From f1630858baffb0545d440326331d1ec2debb555d Mon Sep 17 00:00:00 2001 From: George Raduta Date: Thu, 12 Dec 2024 16:04:28 +0100 Subject: [PATCH] [OGUI-1590] Add support for final ECS operation (error/timeout) in DCS SOR pane (#2697) * Add support for ECS operation done in SOR * Fix lint issues --- .../lib/adapters/DcsIntegratedEventAdapter.js | 89 ++++++++++++++++++- .../common/ecsOperationAndStepStatus.enum.js | 27 ++++++ .../public/common/enums/DetectorState.enum.js | 6 +- .../Environment/components/dcs/dcsSorPanel.js | 38 ++++++-- 4 files changed, 151 insertions(+), 9 deletions(-) create mode 100644 Control/lib/common/ecsOperationAndStepStatus.enum.js diff --git a/Control/lib/adapters/DcsIntegratedEventAdapter.js b/Control/lib/adapters/DcsIntegratedEventAdapter.js index 1dfc286dd..951c18c00 100644 --- a/Control/lib/adapters/DcsIntegratedEventAdapter.js +++ b/Control/lib/adapters/DcsIntegratedEventAdapter.js @@ -11,8 +11,15 @@ * or submit itself to any jurisdiction. */ +const { + EcsOperationAndStepStatus: { + DONE_ERROR, + DONE_TIMEOUT + } +} = require('../common/ecsOperationAndStepStatus.enum.js'); + /** - * DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event + * @class DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event * * The DCS SOR event is a special event that comes from either: * * the DCS service itself (when containing the payload "dcsEvent") and it is for one detector only @@ -26,7 +33,77 @@ class DcsIntegratedEventAdapter { } /** - * Build a DCS Integrated Event from an AliECS Integrated Service Event. If it is a DCSevent, the detector will replace detectors array + * Build a DCS Integrated Event from an AliECS Integrated Service Event - SOR. If it is a DCSevent, the detector will replace detectors array + * + * // IntegratedService event, related to SOR but with a failure on ECS side (such as timeout) + * @example + * { + * "timestamp": 1733497646607, + * "integratedServiceEvent": { + * "name": "readout-dataflow.dcs.sor", + * "error": "DCS SOR timed out after 1s: rpc error: code = DeadlineExceeded desc = Deadline Exceeded", + * "operationName": "dcs.StartOfRun()", + * "operationStatus": "ONGOING", + * "operationStep": "perform DCS call: StartOfRun", + * "operationStepStatus": "DONE_TIMEOUT", + * "environmentId": "2rRm96N9k7E", + * "payload": "{\"detectors\":[\"EMC\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\"},\"runNumber\":1601}" + * } + * // IntegratedService event with final state DONE_ERROR following the DONE_TIMEOUT from above + * @example + * { + * "timestamp": 1734004912438, + * "integratedServiceEvent": { + * "name": "readout-dataflow.dcs.sor", + * "error": "DCS SOR timed out after 100ms: rpc error: code = DeadlineExceeded desc = context deadline exceeded : SOR failed for EMC, FDD, DCS EOR will run anyway for this run", + * "operationName": "dcs.StartOfRun()", + * "operationStatus": "DONE_ERROR", + * "operationStep": "perform DCS call: StartOfRun", + * "operationStepStatus": "DONE_ERROR", + * "environmentId": "2rYQabnjWy2", + * "payload": "{\"detectors\":[\"EMC\",\"FDD\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\",\"FDD\":\"SOR_AVAILABLE\"},\"failedDetectors\":[\"EMC\",\"FDD\"],\"runNumber\":1622}" + * } + * + * // IntegratedService event, related to SOR_PROGRESSING with payload from DCS + * @example + * { + * "timestamp": 1734004912360, + * "timestampNano": 1734004912360675322, + * "environmentEvent": null, + * "taskEvent": null, + * "roleEvent": null, + * "callEvent": null, + * "integratedServiceEvent": { + * "name": "readout-dataflow.dcs.sor", + * "error": null, + * "operationName": "dcs.StartOfRun()", + * "operationStatus": "ONGOING", + * "operationStep": "perform DCS call: StartOfRun", + * "operationStepStatus": "ONGOING", + * "environmentId": "2rYQabnjWy2", + * "payload": \"{ + * \"dcsEvent\": { + * \"eventtype\":20, + * \"detector\":2, + * \"state\":5,\" + * extraParameters\":{ + * \"run_no\":\"1622\" + * }, + * \"timestamp\":\"2024-12-12 13:01:52.358\", + * \"message\":\"run_type\" + * }, + * \"detector\":\"EMC\", + * \"detectors\":[\"EMC\",\"FDD\"], + * \"detectorsReadiness\":{ + * \"EMC\":\"SOR_AVAILABLE\", + * \"FDD\":\"SOR_AVAILABLE\" + * }, + * \"runNumber\":1622, + * \"state\":\"SOR_PROGRESSING\" + * }" + * } + * + * Final OperationStates: DONE_TIMEOUT/DONE_ERROR/DONE_OK * @param {object} event - AliECS Integrated Service Event * @param {number} timestamp - timestamp of the event (int64 as per proto file definition) * @return {object} DCS Integrated Event @@ -37,7 +114,13 @@ class DcsIntegratedEventAdapter { const payloadJSON = JSON.parse(payload); const { dcsEvent, runNumber, detector = null, state } = payloadJSON; - if (!dcsEvent) { + + if (!dcsEvent + && operationStatus !== DONE_ERROR && operationStatus !== DONE_TIMEOUT + && operationStepStatus !== DONE_ERROR && operationStepStatus !== DONE_TIMEOUT + ) { + // if there is no DCS event and status is not final error or timeout, we ignore the event as we expect to have `RUN_OK` from DCS as final state + // or DONE_TIMEOUT or DONE_ERROR from ECS. We are not interested in DONE_OK from ECS as this means all detectors in RUN_OK which we already look for return null; } let { detectors } = payloadJSON; diff --git a/Control/lib/common/ecsOperationAndStepStatus.enum.js b/Control/lib/common/ecsOperationAndStepStatus.enum.js new file mode 100644 index 000000000..4b38f9db8 --- /dev/null +++ b/Control/lib/common/ecsOperationAndStepStatus.enum.js @@ -0,0 +1,27 @@ +/** + * @license + * Copyright 2019-2020 CERN and copyright holders of ALICE O2. + * See http://alice-o2.web.cern.ch/copyright for details of the copyright holders. + * All rights not expressly granted are reserved. + * + * This software is distributed under the terms of the GNU General Public + * License v3 (GPL Version 3), copied verbatim in the file "COPYING". + * + * In applying this license CERN does not waive the privileges and immunities + * granted to it by virtue of its status as an Intergovernmental Organization + * or submit itself to any jurisdiction. +*/ + +/** + * Available ECS Statuses of operations for Kafka Events + * These operations can be under the label: + * * operationStatus + * * operationStepStatus + */ +const EcsOperationAndStepStatus = Object.freeze({ + DONE_OK: 'DONE_OK', + DONE_ERROR: 'DONE_ERROR', + DONE_TIMEOUT: 'DONE_TIMEOUT', +}); + +exports.EcsOperationAndStepStatus = EcsOperationAndStepStatus; diff --git a/Control/public/common/enums/DetectorState.enum.js b/Control/public/common/enums/DetectorState.enum.js index 95e2ddbc4..c2a4d1630 100644 --- a/Control/public/common/enums/DetectorState.enum.js +++ b/Control/public/common/enums/DetectorState.enum.js @@ -62,5 +62,9 @@ export const DetectorStateStyle = Object.freeze({ EOR_AVAILABLE: '', PFR_AVAILABLE: '', PFR_UNAVAILABLE: '', - TIMEOUT: '', + TIMEOUT: 'bg-danger white', + // Custom states for the SOR/EOR operations covered by ECS when DCS does not reply + DONE_TIMEOUT: 'bg-danger white', + DONE_ERROR: 'bg-danger white', + DONE_OK: 'bg-primary white', }); diff --git a/Control/public/pages/Environment/components/dcs/dcsSorPanel.js b/Control/public/pages/Environment/components/dcs/dcsSorPanel.js index d923d9911..05a7197b6 100644 --- a/Control/public/pages/Environment/components/dcs/dcsSorPanel.js +++ b/Control/public/pages/Environment/components/dcs/dcsSorPanel.js @@ -23,7 +23,7 @@ import { infoLoggerButtonLink } from './../../../../common/buttons/infoLoggerRed /** * Panel that will display DCS last states during the SOR activity at the start of run * @param {string} id - environment id - * @param {array} detectors - list of detectors + * @param {array} detectors - list of detectors as received by the environment currently displayed in variable `includedDetectors` * @return {vnode} */ export const dcsSorPanel = (id, detectors) => { @@ -67,18 +67,46 @@ export const dcsSorPanel = (id, detectors) => { } /** - * Group operations by detector + * Group events by detector and filter out events that are arriving after a final event such as + * some detectors might end the SOR sequence and arrive in RUN_OK, DONE_TIMEOUT, DONE_ERROR state but still recieve from ECS event that if failed. + * This is incorrect form ECS and should be filtered out. * @param {array} operations - list of operations * @return {object} */ const groupOperationsByDetector = (operations) => { const groupedOperations = {}; - operations.forEach((operation) => { - operation.detectors.forEach((detector) => { + operations.forEach((event) => { + const eventCopy = JSON.parse(JSON.stringify(event)); + eventCopy.detectors.forEach((detector) => { if (!groupedOperations[detector]) { groupedOperations[detector] = []; + if (!eventCopy?.state) { + // first operation might be an error or timeout which comes without a state + eventCopy.state = eventCopy.operationStepStatus ?? eventCopy.operationStatus; + } + groupedOperations[detector].push(eventCopy); + } else { + const lastOperation = groupedOperations[detector][groupedOperations[detector].length - 1]; + if (eventCopy.state) { + // If there is a state, it means it is still an event from DCS + groupedOperations[detector].push(eventCopy); + } else if ( + lastOperation?.state !== 'RUN_OK' + && lastOperation?.state !== 'DONE_TIMEOUT' + && lastOperation?.state !== 'DONE_ERROR' + ) { + // we only add event or step with status DONE_TIMEOUT or DONE_ERROR if the last event state of that detector is SOR_PROGRESSING + const operationStatus = eventCopy.operationStatus; + const operationStepStatus = eventCopy.operationStepStatus; + // priority is given to operationStep as it offers more granularity + if (operationStepStatus === 'DONE_TIMEOUT' || operationStepStatus === 'DONE_ERROR') { + eventCopy.state = operationStepStatus; + } else if (operationStatus === 'DONE_TIMEOUT' || operationStatus === 'DONE_ERROR') { + eventCopy.state = operationStatus; + } + groupedOperations[detector].push(eventCopy); + } } - groupedOperations[detector].push(operation); }); }); return groupedOperations;