Skip to content

Commit

Permalink
[OGUI-1590] Add support for final ECS operation (error/timeout) in DC…
Browse files Browse the repository at this point in the history
…S SOR pane (#2697)

* Add support for ECS operation done in SOR
* Fix lint issues
  • Loading branch information
graduta authored Dec 12, 2024
1 parent 4f7f920 commit f163085
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 9 deletions.
89 changes: 86 additions & 3 deletions Control/lib/adapters/DcsIntegratedEventAdapter.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@
* or submit itself to any jurisdiction.
*/

const {
EcsOperationAndStepStatus: {
DONE_ERROR,
DONE_TIMEOUT
}
} = require('../common/ecsOperationAndStepStatus.enum.js');

/**
* DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event
* @class DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event
*
* The DCS SOR event is a special event that comes from either:
* * the DCS service itself (when containing the payload "dcsEvent") and it is for one detector only
Expand All @@ -26,7 +33,77 @@ class DcsIntegratedEventAdapter {
}

/**
* Build a DCS Integrated Event from an AliECS Integrated Service Event. If it is a DCSevent, the detector will replace detectors array
* Build a DCS Integrated Event from an AliECS Integrated Service Event - SOR. If it is a DCSevent, the detector will replace detectors array
*
* // IntegratedService event, related to SOR but with a failure on ECS side (such as timeout)
* @example
* {
* "timestamp": 1733497646607,
* "integratedServiceEvent": {
* "name": "readout-dataflow.dcs.sor",
* "error": "DCS SOR timed out after 1s: rpc error: code = DeadlineExceeded desc = Deadline Exceeded",
* "operationName": "dcs.StartOfRun()",
* "operationStatus": "ONGOING",
* "operationStep": "perform DCS call: StartOfRun",
* "operationStepStatus": "DONE_TIMEOUT",
* "environmentId": "2rRm96N9k7E",
* "payload": "{\"detectors\":[\"EMC\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\"},\"runNumber\":1601}"
* }
* // IntegratedService event with final state DONE_ERROR following the DONE_TIMEOUT from above
* @example
* {
* "timestamp": 1734004912438,
* "integratedServiceEvent": {
* "name": "readout-dataflow.dcs.sor",
* "error": "DCS SOR timed out after 100ms: rpc error: code = DeadlineExceeded desc = context deadline exceeded : SOR failed for EMC, FDD, DCS EOR will run anyway for this run",
* "operationName": "dcs.StartOfRun()",
* "operationStatus": "DONE_ERROR",
* "operationStep": "perform DCS call: StartOfRun",
* "operationStepStatus": "DONE_ERROR",
* "environmentId": "2rYQabnjWy2",
* "payload": "{\"detectors\":[\"EMC\",\"FDD\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\",\"FDD\":\"SOR_AVAILABLE\"},\"failedDetectors\":[\"EMC\",\"FDD\"],\"runNumber\":1622}"
* }
*
* // IntegratedService event, related to SOR_PROGRESSING with payload from DCS
* @example
* {
* "timestamp": 1734004912360,
* "timestampNano": 1734004912360675322,
* "environmentEvent": null,
* "taskEvent": null,
* "roleEvent": null,
* "callEvent": null,
* "integratedServiceEvent": {
* "name": "readout-dataflow.dcs.sor",
* "error": null,
* "operationName": "dcs.StartOfRun()",
* "operationStatus": "ONGOING",
* "operationStep": "perform DCS call: StartOfRun",
* "operationStepStatus": "ONGOING",
* "environmentId": "2rYQabnjWy2",
* "payload": \"{
* \"dcsEvent\": {
* \"eventtype\":20,
* \"detector\":2,
* \"state\":5,\"
* extraParameters\":{
* \"run_no\":\"1622\"
* },
* \"timestamp\":\"2024-12-12 13:01:52.358\",
* \"message\":\"run_type\"
* },
* \"detector\":\"EMC\",
* \"detectors\":[\"EMC\",\"FDD\"],
* \"detectorsReadiness\":{
* \"EMC\":\"SOR_AVAILABLE\",
* \"FDD\":\"SOR_AVAILABLE\"
* },
* \"runNumber\":1622,
* \"state\":\"SOR_PROGRESSING\"
* }"
* }
*
* Final OperationStates: DONE_TIMEOUT/DONE_ERROR/DONE_OK
* @param {object} event - AliECS Integrated Service Event
* @param {number} timestamp - timestamp of the event (int64 as per proto file definition)
* @return {object} DCS Integrated Event
Expand All @@ -37,7 +114,13 @@ class DcsIntegratedEventAdapter {

const payloadJSON = JSON.parse(payload);
const { dcsEvent, runNumber, detector = null, state } = payloadJSON;
if (!dcsEvent) {

if (!dcsEvent
&& operationStatus !== DONE_ERROR && operationStatus !== DONE_TIMEOUT
&& operationStepStatus !== DONE_ERROR && operationStepStatus !== DONE_TIMEOUT
) {
// if there is no DCS event and status is not final error or timeout, we ignore the event as we expect to have `RUN_OK` from DCS as final state
// or DONE_TIMEOUT or DONE_ERROR from ECS. We are not interested in DONE_OK from ECS as this means all detectors in RUN_OK which we already look for
return null;
}
let { detectors } = payloadJSON;
Expand Down
27 changes: 27 additions & 0 deletions Control/lib/common/ecsOperationAndStepStatus.enum.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* @license
* Copyright 2019-2020 CERN and copyright holders of ALICE O2.
* See http://alice-o2.web.cern.ch/copyright for details of the copyright holders.
* All rights not expressly granted are reserved.
*
* This software is distributed under the terms of the GNU General Public
* License v3 (GPL Version 3), copied verbatim in the file "COPYING".
*
* In applying this license CERN does not waive the privileges and immunities
* granted to it by virtue of its status as an Intergovernmental Organization
* or submit itself to any jurisdiction.
*/

/**
* Available ECS Statuses of operations for Kafka Events
* These operations can be under the label:
* * operationStatus
* * operationStepStatus
*/
const EcsOperationAndStepStatus = Object.freeze({
DONE_OK: 'DONE_OK',
DONE_ERROR: 'DONE_ERROR',
DONE_TIMEOUT: 'DONE_TIMEOUT',
});

exports.EcsOperationAndStepStatus = EcsOperationAndStepStatus;
6 changes: 5 additions & 1 deletion Control/public/common/enums/DetectorState.enum.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,9 @@ export const DetectorStateStyle = Object.freeze({
EOR_AVAILABLE: '',
PFR_AVAILABLE: '',
PFR_UNAVAILABLE: '',
TIMEOUT: '',
TIMEOUT: 'bg-danger white',
// Custom states for the SOR/EOR operations covered by ECS when DCS does not reply
DONE_TIMEOUT: 'bg-danger white',
DONE_ERROR: 'bg-danger white',
DONE_OK: 'bg-primary white',
});
38 changes: 33 additions & 5 deletions Control/public/pages/Environment/components/dcs/dcsSorPanel.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { infoLoggerButtonLink } from './../../../../common/buttons/infoLoggerRed
/**
* Panel that will display DCS last states during the SOR activity at the start of run
* @param {string} id - environment id
* @param {array<string>} detectors - list of detectors
* @param {array<string>} detectors - list of detectors as received by the environment currently displayed in variable `includedDetectors`
* @return {vnode}
*/
export const dcsSorPanel = (id, detectors) => {
Expand Down Expand Up @@ -67,18 +67,46 @@ export const dcsSorPanel = (id, detectors) => {
}

/**
* Group operations by detector
* Group events by detector and filter out events that are arriving after a final event such as
* some detectors might end the SOR sequence and arrive in RUN_OK, DONE_TIMEOUT, DONE_ERROR state but still recieve from ECS event that if failed.
* This is incorrect form ECS and should be filtered out.
* @param {array<object>} operations - list of operations
* @return {object}
*/
const groupOperationsByDetector = (operations) => {
const groupedOperations = {};
operations.forEach((operation) => {
operation.detectors.forEach((detector) => {
operations.forEach((event) => {
const eventCopy = JSON.parse(JSON.stringify(event));
eventCopy.detectors.forEach((detector) => {
if (!groupedOperations[detector]) {
groupedOperations[detector] = [];
if (!eventCopy?.state) {
// first operation might be an error or timeout which comes without a state
eventCopy.state = eventCopy.operationStepStatus ?? eventCopy.operationStatus;
}
groupedOperations[detector].push(eventCopy);
} else {
const lastOperation = groupedOperations[detector][groupedOperations[detector].length - 1];
if (eventCopy.state) {
// If there is a state, it means it is still an event from DCS
groupedOperations[detector].push(eventCopy);
} else if (
lastOperation?.state !== 'RUN_OK'
&& lastOperation?.state !== 'DONE_TIMEOUT'
&& lastOperation?.state !== 'DONE_ERROR'
) {
// we only add event or step with status DONE_TIMEOUT or DONE_ERROR if the last event state of that detector is SOR_PROGRESSING
const operationStatus = eventCopy.operationStatus;
const operationStepStatus = eventCopy.operationStepStatus;
// priority is given to operationStep as it offers more granularity
if (operationStepStatus === 'DONE_TIMEOUT' || operationStepStatus === 'DONE_ERROR') {
eventCopy.state = operationStepStatus;
} else if (operationStatus === 'DONE_TIMEOUT' || operationStatus === 'DONE_ERROR') {
eventCopy.state = operationStatus;
}
groupedOperations[detector].push(eventCopy);
}
}
groupedOperations[detector].push(operation);
});
});
return groupedOperations;
Expand Down

0 comments on commit f163085

Please sign in to comment.