From 57044dba82ff8c57326b7bd1fc83e4071be5cc81 Mon Sep 17 00:00:00 2001 From: Antoine Lima <7421319+limaanto@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:30:50 +0200 Subject: [PATCH 1/2] Port hd_monitor to ROS2 (#334) * Port hd_monitor.py * Adapt documentation and CMakeList to new hd_monitor.py * Fix execution flag of hd_monitor * Add launch test for hd_monitor * Implement low and crit parameters in hd_monitor * Improve hd_monitor code quality (cherry picked from commit 05a96450bad1f074156615aaafa00a78b894d312) --- diagnostic_common_diagnostics/CMakeLists.txt | 5 + diagnostic_common_diagnostics/README.md | 15 +- .../hd_monitor.py | 153 ++++++++++++++++++ diagnostic_common_diagnostics/mainpage.dox | 1 + .../systemtest/test_hd_monitor_launchtest.py | 110 +++++++++++++ 5 files changed, 283 insertions(+), 1 deletion(-) create mode 100755 diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py create mode 100644 diagnostic_common_diagnostics/test/systemtest/test_hd_monitor_launchtest.py diff --git a/diagnostic_common_diagnostics/CMakeLists.txt b/diagnostic_common_diagnostics/CMakeLists.txt index 9ab1c21f..26125367 100644 --- a/diagnostic_common_diagnostics/CMakeLists.txt +++ b/diagnostic_common_diagnostics/CMakeLists.txt @@ -12,6 +12,7 @@ install(PROGRAMS ${PROJECT_NAME}/ntp_monitor.py ${PROJECT_NAME}/ram_monitor.py ${PROJECT_NAME}/sensors_monitor.py + ${PROJECT_NAME}/hd_monitor.py DESTINATION lib/${PROJECT_NAME} ) @@ -29,6 +30,10 @@ if(BUILD_TESTING) test/systemtest/test_ntp_monitor_launchtest.py TARGET ntp_monitor_launchtest TIMEOUT 20) + add_launch_test( + test/systemtest/test_hd_monitor_launchtest.py + TARGET hd_monitor_launchtest + TIMEOUT 20) endif() ament_package() diff --git a/diagnostic_common_diagnostics/README.md b/diagnostic_common_diagnostics/README.md index 3b410ee3..fdece7f8 100644 --- a/diagnostic_common_diagnostics/README.md +++ b/diagnostic_common_diagnostics/README.md @@ -67,7 +67,20 @@ Computer name in diagnostics output (ex: 'c1') Disable self test. ## hd_monitor.py -**To be ported** +Runs 'shutil.disk_usage' to check if there is enough space left on a given device. +* Above 5% of free space left, an `OK` status will be published. +* Between 5% and 1%, a `WARN` status will be published, +* Below 1%, an `ERROR` status will be published. + +### Published Topics +#### /diagnostics +diagnostic_msgs/DiagnosticArray +The diagnostics information. + +### Parameters +#### path +(default: home directory "~") +Path in which to check remaining space. ## ram_monitor.py The `ram_monitor` module allows users to monitor the RAM usage of their system in real-time. diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py new file mode 100755 index 00000000..24c04d33 --- /dev/null +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py @@ -0,0 +1,153 @@ +#! /usr/bin/env python3 +"""Hard Drive (or any other memory) monitor. Contains a the monitor node and its main function.""" +# -*- coding: utf-8 -*- +# +# Software License Agreement (BSD License) +# +# Copyright (c) 2009, Willow Garage, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the Willow Garage nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# \author Kevin Watts +# \author Antoine Lima + +from pathlib import Path +from shutil import disk_usage +from socket import gethostname +from typing import List + +from diagnostic_msgs.msg import DiagnosticStatus, KeyValue +from diagnostic_updater import Updater +from rcl_interfaces.msg import SetParametersResult +import rclpy +from rclpy.node import Node + + +FREE_PERCENT_LOW = 0.05 +FREE_PERCENT_CRIT = 0.01 +DICT_STATUS = { + DiagnosticStatus.OK: 'OK', + DiagnosticStatus.WARN: 'Warning', + DiagnosticStatus.ERROR: 'Error', +} +DICT_USAGE = { + DiagnosticStatus.OK: 'OK', + DiagnosticStatus.WARN: 'Low Disk Space', + DiagnosticStatus.ERROR: 'Very Low Disk Space', +} + + +class HDMonitor(Node): + """Diagnostic node checking the remaining space on the specified hard drive. + + Three ROS parameters: + - path: Path on the filesystem to check (string, default: home directory) + - free_percent_low: Percentage at which to consider the space left as low + - free_percent_crit: Percentage at which to consider the space left as critical + """ + + def __init__(self): + hostname = gethostname().replace('.', '_').replace('-', '_') + super().__init__(f'hd_monitor_{hostname}') + + self._path = '~' + self._free_percent_low = 0.05 + self._free_percent_crit = 0.01 + + self.add_on_set_parameters_callback(self.callback_config) + self.declare_parameter('path', self._path) + self.declare_parameter('free_percent_low', self._free_percent_low) + self.declare_parameter('free_percent_crit', self._free_percent_crit) + + self._updater = Updater(self) + self._updater.setHardwareID(hostname) + self._updater.add(f'{hostname} HD Usage', self.check_disk_usage) + + def callback_config(self, params: List[rclpy.Parameter]): + """Retrieve ROS parameters. + + see the class documentation for declared parameters. + """ + for param in params: + match param.name: + case 'path': + self._path = str( + Path(param.value).expanduser().resolve(strict=True) + ) + case 'free_percent_low': + self._free_percent_low = param.value + case 'free_percent_crit': + self._free_percent_crit = param.value + + return SetParametersResult(successful=True) + + def check_disk_usage(self, diag: DiagnosticStatus) -> DiagnosticStatus: + """Compute the disk usage and derive a status from it. + + Task periodically ran by the diagnostic updater. + """ + diag.level = DiagnosticStatus.OK + + total, _, free = disk_usage(self._path) + percent = free / total + + if percent > self._free_percent_low: + diag.level = DiagnosticStatus.OK + elif percent > self._free_percent_crit: + diag.level = DiagnosticStatus.WARN + else: + diag.level = DiagnosticStatus.ERROR + + total_go = total // (1024 * 1024) + diag.values.extend( + [ + KeyValue(key='Name', value=self._path), + KeyValue(key='Status', value=DICT_STATUS[diag.level]), + KeyValue(key='Total (Go)', value=str(total_go)), + KeyValue(key='Available (%)', value=str(round(percent, 2))), + ] + ) + + diag.message = DICT_USAGE[diag.level] + return diag + + +def main(args=None): + """Run the HDMonitor class.""" + rclpy.init(args=args) + + node = HDMonitor() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + + +if __name__ == '__main__': + main() diff --git a/diagnostic_common_diagnostics/mainpage.dox b/diagnostic_common_diagnostics/mainpage.dox index 7aa872ce..f9cd677d 100644 --- a/diagnostic_common_diagnostics/mainpage.dox +++ b/diagnostic_common_diagnostics/mainpage.dox @@ -5,6 +5,7 @@ \b diagnostic_common_diagnostics contains a few common diagnostic nodes - cpu_monitor publishes diagnostic messages with the CPU usage of the system. +- hd_monitor publishes diagnostic messages related to the available space on a given storage device. - ntp_monitor publishes diagnostic messages for how well the NTP time sync is working. - tf_monitor used to publish diagnostic messages reporting on the health of the TF tree. It is based on tfwtf. It is not ported to ROS2. diff --git a/diagnostic_common_diagnostics/test/systemtest/test_hd_monitor_launchtest.py b/diagnostic_common_diagnostics/test/systemtest/test_hd_monitor_launchtest.py new file mode 100644 index 00000000..9086c5d7 --- /dev/null +++ b/diagnostic_common_diagnostics/test/systemtest/test_hd_monitor_launchtest.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Software License Agreement (BSD License) +# +# Copyright (c) 2023, Robert Bosch GmbH +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of the Willow Garage nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import unittest + +from diagnostic_msgs.msg import DiagnosticArray + +import launch + +import launch_ros + +import launch_testing + +from launch_testing_ros import WaitForTopics + +import pytest + +import rclpy + + +@pytest.mark.launch_test +def generate_test_description(): + """Launch the hd_monitor node and return a launch description.""" + return launch.LaunchDescription( + [ + launch_ros.actions.Node( + package='diagnostic_common_diagnostics', + executable='hd_monitor.py', + name='hd_monitor', + output='screen', + parameters=[{'free_percent_low': 0.20, 'free_percent_crit': 0.05}], + ), + launch_testing.actions.ReadyToTest(), + ] + ) + + +class TestHDMonitor(unittest.TestCase): + """Test if the hd_monitor node is publishing diagnostics.""" + + def __init__(self, methodName: str = 'runTest') -> None: + super().__init__(methodName) + self.received_messages = [] + + def _received_message(self, msg): + self.received_messages.append(msg) + + def _get_min_level(self): + levels = [ + int.from_bytes(status.level, 'little') + for diag in self.received_messages + for status in diag.status + ] + if len(levels) == 0: + return -1 + return min(levels) + + def test_topic_published(self): + """Test if the hd_monitor node is publishing diagnostics.""" + with WaitForTopics([('/diagnostics', DiagnosticArray)], timeout=5): + print('Topic found') + + rclpy.init() + test_node = rclpy.create_node('test_node') + test_node.create_subscription( + DiagnosticArray, '/diagnostics', self._received_message, 1 + ) + + while len(self.received_messages) < 10: + rclpy.spin_once(test_node, timeout_sec=1) + if (min_level := self._get_min_level()) == 0: + break + + test_node.destroy_node() + rclpy.shutdown() + print(f'Got {len(self.received_messages)} messages:') + for msg in self.received_messages: + print(msg) + self.assertEqual(min_level, 0) From e496d1aee28515473e0f79470ed136265b687342 Mon Sep 17 00:00:00 2001 From: Christian Henkel <6976069+ct2034@users.noreply.github.com> Date: Wed, 17 Jul 2024 14:51:46 +0200 Subject: [PATCH 2/2] fixing pep257 problems introduced by #334 (#384) Signed-off-by: Christian Henkel --- .../diagnostic_common_diagnostics/hd_monitor.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py index 24c04d33..ac4eae74 100755 --- a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/hd_monitor.py @@ -64,7 +64,8 @@ class HDMonitor(Node): - """Diagnostic node checking the remaining space on the specified hard drive. + """ + Diagnostic node checking the remaining space on the specified hard drive. Three ROS parameters: - path: Path on the filesystem to check (string, default: home directory) @@ -90,7 +91,8 @@ def __init__(self): self._updater.add(f'{hostname} HD Usage', self.check_disk_usage) def callback_config(self, params: List[rclpy.Parameter]): - """Retrieve ROS parameters. + """ + Retrieve ROS parameters. see the class documentation for declared parameters. """ @@ -108,7 +110,8 @@ def callback_config(self, params: List[rclpy.Parameter]): return SetParametersResult(successful=True) def check_disk_usage(self, diag: DiagnosticStatus) -> DiagnosticStatus: - """Compute the disk usage and derive a status from it. + """ + Compute the disk usage and derive a status from it. Task periodically ran by the diagnostic updater. """