Merge pull request #2587 from albinsuresh/fix/2501/restart-agent-afte…

…r-update Restart tedge-agent after self update
thin-edge · Jan 18, 2024 · fd91232 · fd91232 · github-actions · Jan 18, 2024
2 parents 0463eb5 + 2aac2f8
commit fd91232
Show file tree

Hide file tree

Showing 28 changed files with 246 additions and 36 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,23 @@
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_amd64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_amd64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-watchdog_0.0.1_amd64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge_0.0.1_amd64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_amd64.deb filter=lfs diff=lfs merge=lfs -text
+
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_arm64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_arm64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_arm64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-watchdog_0.0.1_arm64.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge_0.0.1_arm64.deb filter=lfs diff=lfs merge=lfs -text
+
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_armv6.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_armv6.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_armv6.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-watchdog_0.0.1_armv6.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge_0.0.1_armv6.deb filter=lfs diff=lfs merge=lfs -text
+
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_armhf.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_armhf.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_armhf.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-watchdog_0.0.1_armhf.deb filter=lfs diff=lfs merge=lfs -text
+tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge_0.0.1_armhf.deb filter=lfs diff=lfs merge=lfs -text
diff --git a/Cargo.toml b/Cargo.toml
@@ -188,7 +188,7 @@ zeroize = "1.5"
 codegen-units = 1
 lto = true
 opt-level = "z"
-panic = "abort"
+panic = "unwind"
 strip = "symbols"
 overflow-checks = true
 

diff --git a/crates/core/tedge_actors/src/runtime.rs b/crates/core/tedge_actors/src/runtime.rs
@@ -40,13 +40,13 @@ pub enum RuntimeEvent {
     Error(RuntimeError),
     Started { task: String },
     Stopped { task: String },
-    Aborted { task: String, error: RuntimeError },
+    Aborted { task: String, error: String },
 }
 
 /// The actor runtime
 pub struct Runtime {
     handle: RuntimeHandle,
-    bg_task: JoinHandle<()>,
+    bg_task: JoinHandle<Result<(), RuntimeError>>,
 }
 
 impl Runtime {
@@ -88,17 +88,22 @@ impl Runtime {
     /// - Or, all the runtime handler clones have been dropped
     ///       and all the running tasks have reach completion (successfully or not).
     pub async fn run_to_completion(self) -> Result<(), RuntimeError> {
-        Runtime::wait_for_completion(self.bg_task).await
+        if let Err(err) = Runtime::wait_for_completion(self.bg_task).await {
+            error!("Aborted due to {err}");
+            std::process::exit(1)
+        }
+
+        Ok(())
     }
 
-    async fn wait_for_completion(bg_task: JoinHandle<()>) -> Result<(), RuntimeError> {
-        bg_task.await.map_err(|err| {
-            if err.is_panic() {
-                RuntimeError::RuntimePanic
-            } else {
-                RuntimeError::RuntimeCancellation
-            }
-        })
+    async fn wait_for_completion(
+        bg_task: JoinHandle<Result<(), RuntimeError>>,
+    ) -> Result<(), RuntimeError> {
+        match bg_task.await {
+            Ok(result) => result,
+            Err(err) if err.is_panic() => Err(RuntimeError::RuntimePanic),
+            Err(_) => Err(RuntimeError::RuntimeCancellation),
+        }
     }
 }
 
@@ -167,8 +172,9 @@ impl RuntimeActor {
         }
     }
 
-    async fn run(mut self) {
+    async fn run(mut self) -> Result<(), RuntimeError> {
         info!(target: "Runtime", "Started");
+        let mut aborting_error = None;
         let mut actors_count: usize = 0;
         loop {
             tokio::select! {
@@ -202,7 +208,12 @@ impl RuntimeActor {
                     }
                 },
                 Some(finished_actor) = self.futures.next() => {
-                    self.handle_actor_finishing(finished_actor).await;
+                    if let Err(error) = self.handle_actor_finishing(finished_actor).await {
+                        info!(target: "Runtime", "Shutting down on error: {error}");
+                        aborting_error = Some(error);
+                        shutdown_actors(&mut self.running_actors).await;
+                        break
+                    }
                 }
             }
         }
@@ -216,30 +227,43 @@ impl RuntimeActor {
             }
             _ = self.wait_for_actors_to_finish() => info!(target: "Runtime", "All actors have finished")
         }
+
+        match aborting_error {
+            None => Ok(()),
+            Some(error) => Err(error),
+        }
     }
 
     async fn wait_for_actors_to_finish(&mut self) {
         while let Some(finished_actor) = self.futures.next().await {
-            self.handle_actor_finishing(finished_actor).await;
+            let _ = self.handle_actor_finishing(finished_actor).await;
         }
     }
 
     async fn handle_actor_finishing(
         &mut self,
         finished_actor: Result<Result<String, (String, RuntimeError)>, JoinError>,
-    ) {
+    ) -> Result<(), RuntimeError> {
         match finished_actor {
-            Err(e) => error!(target: "Runtime", "Failed to execute actor: {e}"),
+            Err(e) => {
+                error!(target: "Runtime", "Failed to execute actor: {e}");
+                Err(RuntimeError::JoinError(e))
+            }
             Ok(Ok(actor)) => {
                 self.running_actors.remove(&actor);
                 info!(target: "Runtime", "Actor has finished: {actor}");
                 self.send_event(RuntimeEvent::Stopped { task: actor }).await;
+                Ok(())
             }
             Ok(Err((actor, error))) => {
                 self.running_actors.remove(&actor);
                 error!(target: "Runtime", "Actor {actor} has finished unsuccessfully: {error:?}");
-                self.send_event(RuntimeEvent::Aborted { task: actor, error })
-                    .await;
+                self.send_event(RuntimeEvent::Aborted {
+                    task: actor.clone(),
+                    error: format!("{error}"),
+                })
+                .await;
+                Err(error)
             }
         }
     }
@@ -316,7 +340,15 @@ mod tests {
                         crate::Sender::send(&mut self.messages, EchoMessage::String(message))
                             .await?
                     }
-                    EchoMessage::RuntimeRequest(RuntimeRequest::Shutdown) => break,
+                    EchoMessage::RuntimeRequest(RuntimeRequest::Shutdown) => {
+                        dbg!("shutdown requested");
+                        crate::Sender::send(
+                            &mut self.messages,
+                            EchoMessage::String("Echo stopped".to_string()),
+                        )
+                        .await?;
+                        break;
+                    }
                 }
             }
 
@@ -485,8 +517,8 @@ mod tests {
     #[tokio::test]
     async fn shutdown() {
         let (mut actions_sender, mut events_receiver, ra) = init();
-        let (_, _, actor1) = create_actor(Echo::new);
-        let (_, _, actor2) = create_actor(Echo::new);
+        let (_, _sender1, actor1) = create_actor(Echo::new);
+        let (_, _sender2, actor2) = create_actor(Echo::new);
 
         actions_sender
             .send(RuntimeAction::Spawn(actor1))
@@ -537,26 +569,37 @@ mod tests {
 
         let wait_for_actor_to_panic = async {
             while let Some(event) = events_receiver.next().await {
-                if matches!(event, RuntimeEvent::Aborted { task, .. } if task == "Panic-0") {
-                    break;
+                match event {
+                    RuntimeEvent::Aborted { task, error } if task == "Panic-0" => {
+                        return Some(error);
+                    }
+                    _ => {}
                 }
             }
+            None
         };
 
         tokio::spawn(ra.run());
 
-        tokio::time::timeout(Duration::from_secs(1), wait_for_actor_to_panic)
+        // The panic is caught by the runtime and an event is sent
+        let error = tokio::time::timeout(Duration::from_secs(1), wait_for_actor_to_panic)
             .await
             .expect("Actor to panic in time");
+        assert_eq!(
+            error.map(|s| s.replace(char::is_numeric, "")), // ignore the task id
+            Some("task  panicked".to_string())
+        );
 
-        sender
+        // No more message can be sent to the actors: they have been shutdown
+        assert!(sender
             .send(EchoMessage::String("hello".into()))
             .await
-            .expect("Expected the echo actor to be running and to receive a message");
+            .is_err());
 
+        // The actors have been properly shutdown
         assert_eq!(
             receiver.next().await.unwrap(),
-            EchoMessage::String("hello".into())
+            EchoMessage::String("Echo stopped".into())
         );
     }
 }
diff --git a/crates/core/tedge_agent/src/agent.rs b/crates/core/tedge_agent/src/agent.rs
@@ -59,7 +59,7 @@ use tracing::info;
 use tracing::instrument;
 use tracing::warn;
 
-const TEDGE_AGENT: &str = "tedge-agent";
+pub const TEDGE_AGENT: &str = "tedge-agent";
 
 #[derive(Debug, Clone)]
 pub(crate) struct AgentConfig {
@@ -219,7 +219,8 @@ impl Agent {
 
     #[instrument(skip(self), name = "sm-agent")]
     pub async fn start(self) -> Result<(), anyhow::Error> {
-        info!("Starting tedge agent");
+        let version = env!("CARGO_PKG_VERSION");
+        info!("Starting tedge-agent v{}", version);
         self.init()?;
 
         // Runtime

diff --git a/crates/core/tedge_agent/src/software_manager/actor.rs b/crates/core/tedge_agent/src/software_manager/actor.rs
@@ -1,8 +1,10 @@
+use crate::agent::TEDGE_AGENT;
 use crate::software_manager::config::SoftwareManagerConfig;
 use crate::software_manager::error::SoftwareManagerError;
 use crate::software_manager::error::SoftwareManagerError::NoPlugins;
 use crate::state_repository::error::StateError;
 use crate::state_repository::state::AgentStateRepository;
+use anyhow::anyhow;
 use async_trait::async_trait;
 use plugin_sm::operation_logs::LogKind;
 use plugin_sm::operation_logs::OperationLogs;
@@ -11,6 +13,7 @@ use plugin_sm::plugin_manager::Plugins;
 use serde::Deserialize;
 use serde::Serialize;
 use std::path::PathBuf;
+use std::process::Command;
 use tedge_actors::fan_in_message_type;
 use tedge_actors::Actor;
 use tedge_actors::LoggingReceiver;
@@ -101,7 +104,12 @@ impl Actor for SoftwareManagerActor {
 
         while let Some(request) = input_receiver.recv().await {
             tokio::select! {
-                _ = self.handle_request(request, &mut plugins, &operation_logs) => {}
+                _ = self.handle_request(request, &mut plugins, &operation_logs) => {
+                    if let Err(SoftwareManagerError::NotRunningLatestVersion) = Self::detect_self_update() {
+                        error!("Tedge-agent is no more running the latest-version => a restart is required");
+                        return Err(RuntimeError::ActorError(Box::new(SoftwareManagerError::NotRunningLatestVersion)));
+                    }
+                }
 
                 Some(RuntimeRequest::Shutdown) = input_receiver.recv_signal() => {
                     info!("Received shutdown request from the runtime, exiting...");
@@ -147,11 +155,12 @@ impl SoftwareManagerActor {
     ) -> Result<(), SoftwareManagerError> {
         match request {
             SoftwareCommand::SoftwareUpdateCommand(request) => {
-                if let Err(err) = self
+                match self
                     .handle_software_update_operation(request, plugins, operation_logs)
                     .await
                 {
-                    error!("{:?}", err);
+                    Ok(()) => {}
+                    Err(err) => error!("{:?}", err),
                 }
             }
             SoftwareCommand::SoftwareListCommand(request) => {
@@ -232,6 +241,42 @@ impl SoftwareManagerActor {
         Ok(())
     }
 
+    fn detect_self_update() -> Result<(), SoftwareManagerError> {
+        info!("Checking if tedge got self updated");
+        let current_running_version = env!("CARGO_PKG_VERSION");
+        info!("Current running version: {}", current_running_version);
+
+        let executable_path = std::env::current_exe()
+            .map_err(|e| anyhow!("Failed to retrieve running executable path due to {}", e))?;
+        let agent_binary_path = executable_path.parent().unwrap().join(TEDGE_AGENT);
+
+        let output = Command::new(agent_binary_path)
+            .args(["--version"])
+            .output()
+            .map_err(|e| anyhow!("Failed to fetch version of installed binary due to {}", e))?;
+        if !output.status.success() {
+            return Err(anyhow!(
+                "Fetching version from installed binary failed with {}",
+                String::from_utf8_lossy(&output.stderr)
+            )
+            .into());
+        }
+
+        let version_output = String::from_utf8_lossy(&output.stdout);
+        let version_output_split: Vec<&str> = version_output.split_whitespace().collect();
+        if let ["tedge-agent", installed_binary_version] = version_output_split.as_slice() {
+            info!("Installed binary version: {}", installed_binary_version);
+            if current_running_version != *installed_binary_version {
+                info!("Self update detected. Requesting shutdown...");
+                return Err(SoftwareManagerError::NotRunningLatestVersion);
+            }
+        } else {
+            return Err(anyhow!("Unexpected version output: {:?}", version_output).into());
+        }
+
+        Ok(())
+    }
+
     async fn handle_software_list_operation(
         &mut self,
         request: SoftwareListCommand,

diff --git a/crates/core/tedge_agent/src/software_manager/error.rs b/crates/core/tedge_agent/src/software_manager/error.rs
@@ -22,6 +22,9 @@ pub enum SoftwareManagerError {
     #[error(transparent)]
     FromTedgeConfig(#[from] tedge_config::TEdgeConfigError),
 
+    #[error("Tedge-agent is not running the latest version")]
+    NotRunningLatestVersion,
+
     #[error(transparent)]
     Other(#[from] anyhow::Error),
 }

diff --git a/tests/RobotFramework/bin/setup.sh b/tests/RobotFramework/bin/setup.sh
@@ -13,6 +13,12 @@ pushd "$SCRIPT_DIR/.." >/dev/null || exit 1
 # Required to prevent dbus errors on raspberry pi
 export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
 
+# Use git lfs for test artefacts
+curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
+sudo apt install -y git-lfs
+git lfs install
+git lfs pull
+
 #
 # Setup python virtual environment and install dependencies
 #

diff --git a/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_amd64.deb b/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_amd64.deb
diff --git a/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_arm64.deb b/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_arm64.deb
diff --git a/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_armhf.deb b/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_armhf.deb
diff --git a/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_armv6.deb b/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-agent_0.0.1_armv6.deb
diff --git a/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_amd64.deb b/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_amd64.deb
diff --git a/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_arm64.deb b/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_arm64.deb
diff --git a/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_armhf.deb b/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_armhf.deb
diff --git a/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_armv6.deb b/...RobotFramework/tests/cumulocity/self-update/base-version/tedge-apt-plugin_0.0.1_armv6.deb
diff --git a/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_amd64.deb b/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_amd64.deb
diff --git a/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_arm64.deb b/tests/RobotFramework/tests/cumulocity/self-update/base-version/tedge-mapper_0.0.1_arm64.deb