diff --git a/src/app.ts b/src/app.ts index 939b487..f51a487 100644 --- a/src/app.ts +++ b/src/app.ts @@ -5,6 +5,7 @@ import Handlers from './handlers'; import Validator from './validator'; import Redis, { RedisOptions } from 'ioredis'; import { RedisClient, ClientOpts } from 'redis'; +import * as promClient from 'prom-client'; import AutoscalerLogger from './logger'; import shortid from 'shortid'; import { ASAPPubKeyFetcher } from './asap'; @@ -234,6 +235,11 @@ async function startProcessingGroups() { logger.info(`Waiting ${config.InitialWaitForPooling}ms before starting to loop for group processing`); setTimeout(startProcessingGroups, config.InitialWaitForPooling); +const groupProcessingErrorCounter = new promClient.Counter({ + name: 'autoscaler_group_processing_errors', + help: 'Counter for high level group processing errors', +}); + async function createGroupProcessingJobs() { const start = Date.now(); const pollId = shortid.generate(); @@ -245,6 +251,8 @@ async function createGroupProcessingJobs() { await jobManager.createGroupProcessingJobs(ctx); } catch (err) { ctx.logger.error(`Error while creating group processing jobs`, { err }); + // should increment some group processing error counter here + groupProcessingErrorCounter.inc(); } setTimeout(createGroupProcessingJobs, config.GroupJobsCreationIntervalSec * 1000); } @@ -287,7 +295,7 @@ const h = new Handlers({ scalingManager, }); -const validator = new Validator({ instanceTracker, instanceGroupManager, metricsLoop }); +const validator = new Validator({ instanceTracker, instanceGroupManager, metricsLoop, shutdownManager }); const loggedPaths = ['/sidecar*', '/groups*']; app.use(loggedPaths, stats.middleware); app.use('/', context.injectContext); diff --git a/src/group_report.ts b/src/group_report.ts index d296df8..283cd3b 100644 --- a/src/group_report.ts +++ b/src/group_report.ts @@ -89,6 +89,7 @@ export default class GroupReportGenerator { expiredCount: 0, unTrackedCount: 0, shuttingDownCount: 0, + shutdownCount: 0, shutdownErrorCount: 0, reconfigureErrorCount: 0, reconfigureScheduledCount: 0, diff --git a/src/instance_tracker.ts b/src/instance_tracker.ts index c6650c3..f8770cd 100644 --- a/src/instance_tracker.ts +++ b/src/instance_tracker.ts @@ -684,7 +684,7 @@ export class InstanceTracker { for (let i = 0; i < states.length; i++) { statesShutdownStatus.push(this.shutdownStatusFromState(states[i]) || shutdownStatuses[i]); } - return states.filter((instanceState, index) => !statesShutdownStatus[index] && !shutdownConfirmations[index]); + return states.filter((_, index) => !statesShutdownStatus[index] && !shutdownConfirmations[index]); } mapToInstanceDetails(states: Array): Array { diff --git a/src/validator.ts b/src/validator.ts index ca60500..966bf8c 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -4,11 +4,13 @@ import { Request } from 'express'; import InstanceGroupManager, { InstanceGroup } from './instance_group'; import { InstanceGroupDesiredValuesRequest } from './handlers'; import MetricsLoop from './metrics_loop'; +import ShutdownManager from './shutdown_manager'; export interface ValidatorOptions { instanceTracker: InstanceTracker; metricsLoop: MetricsLoop; instanceGroupManager: InstanceGroupManager; + shutdownManager: ShutdownManager; scaleStatus?: string; cloudStatus?: string; isShuttingDown?: boolean; @@ -19,11 +21,13 @@ export default class Validator { private instanceTracker: InstanceTracker; private instanceGroupManager: InstanceGroupManager; private metricsLoop: MetricsLoop; + private shutdownManager: ShutdownManager; constructor(options: ValidatorOptions) { this.instanceTracker = options.instanceTracker; this.instanceGroupManager = options.instanceGroupManager; this.metricsLoop = options.metricsLoop; + this.shutdownManager = options.shutdownManager; this.groupHasActiveInstances = this.groupHasActiveInstances.bind(this); } @@ -37,10 +41,15 @@ export default class Validator { }) .map((cv, _) => cv.instanceId); + const instanceIds = instanceStates.map((v, _) => v.instanceId); + + const shutdownConfirmations = await this.shutdownManager.getShutdownConfirmations(context, instanceIds); + return ( - instanceStates.filter((v, _) => { + instanceStates.filter((v, i) => { // skip any that have completed shutdown if (v.shutdownComplete) return false; + if (shutdownConfirmations[i]) return false; // only include instances that are not listed as SHUTDOWN or TERMINATED return !shutdownInstances.includes(v.instanceId);