diff --git a/src/app.ts b/src/app.ts index fabc505..b82eaef 100644 --- a/src/app.ts +++ b/src/app.ts @@ -17,6 +17,7 @@ import InstanceLauncher from './instance_launcher'; import LockManager from './lock_manager'; import * as stats from './stats'; import ShutdownManager from './shutdown_manager'; +import ReconfigureManager from './reconfigure_manager'; import JobManager from './job_manager'; import GroupReportGenerator from './group_report'; import Audit from './audit'; @@ -69,7 +70,7 @@ if (config.RedisDb) { const redisClient = new Redis(redisOptions); const audit = new Audit({ - redisClient: redisClient, + redisClient, redisScanCount: config.RedisScanCount, auditTTL: config.AuditTTL, groupRelatedDataTTL: config.GroupRelatedDataTTL, @@ -78,14 +79,20 @@ const audit = new Audit({ const shutdownManager = new ShutdownManager({ redisClient, shutdownTTL: config.ShutDownTTL, - audit: audit, + audit, +}); + +const reconfigureManager = new ReconfigureManager({ + redisClient, + reconfigureTTL: config.ReconfigureTTL, + audit, }); const instanceTracker = new InstanceTracker({ redisClient, redisScanCount: config.RedisScanCount, - shutdownManager: shutdownManager, - audit: audit, + shutdownManager, + audit, idleTTL: config.IdleTTL, metricTTL: config.MetricTTL, provisioningTTL: config.ProvisioningTTL, @@ -94,29 +101,27 @@ const instanceTracker = new InstanceTracker({ }); const cloudManager = new CloudManager({ - shutdownManager: shutdownManager, + shutdownManager, isDryRun: config.DryRun, ociConfigurationFilePath: config.OciConfigurationFilePath, ociConfigurationProfile: config.OciConfigurationProfile, digitalOceanAPIToken: config.DigitalOceanAPIToken, digitalOceanConfigurationFilePath: config.DigitalOceanConfigurationFilePath, - - instanceTracker: instanceTracker, - audit: audit, + instanceTracker, + audit, cloudProviders: config.CloudProviders, - customConfigurationLaunchScriptPath: config.CustomConfigurationLaunchScriptPath, customConfigurationLaunchScriptTimeoutMs: config.CustomConfigurationLaunchScriptTimeoutMs, }); const lockManager: LockManager = new LockManager(logger, { - redisClient: redisClient, + redisClient, jobCreationLockTTL: config.JobsCreationLockTTLMs, groupLockTTLMs: config.GroupLockTTLMs, }); const instanceGroupManager = new InstanceGroupManager({ - redisClient: redisClient, + redisClient, redisScanCount: config.RedisScanCount, initialGroupList: config.GroupList, groupJobsCreationGracePeriod: config.GroupJobsCreationGracePeriodSec, @@ -135,12 +140,12 @@ instanceGroupManager.init(initCtx).catch((err) => { }); const autoscaleProcessor = new AutoscaleProcessor({ - instanceTracker: instanceTracker, - cloudManager: cloudManager, - instanceGroupManager: instanceGroupManager, - lockManager: lockManager, + instanceTracker, + cloudManager, + instanceGroupManager, + lockManager, redisClient, - audit: audit, + audit, }); const metricsLoop = new MetricsLoop({ @@ -153,45 +158,46 @@ const metricsLoop = new MetricsLoop({ const instanceLauncher = new InstanceLauncher({ maxThrottleThreshold: config.MaxThrottleThreshold, - instanceTracker: instanceTracker, - cloudManager: cloudManager, - instanceGroupManager: instanceGroupManager, - lockManager: lockManager, + instanceTracker, + cloudManager, + instanceGroupManager, + lockManager, redisClient, shutdownManager, - audit: audit, + audit, metricsLoop, }); const groupReportGenerator = new GroupReportGenerator({ - instanceTracker: instanceTracker, - shutdownManager: shutdownManager, - metricsLoop: metricsLoop, + instanceTracker, + shutdownManager, + reconfigureManager, + metricsLoop, }); const sanityLoop = new SanityLoop({ - redisClient: redisClient, + redisClient, metricsTTL: config.ServiceLevelMetricsTTL, - cloudManager: cloudManager, + cloudManager, reportExtCallRetryStrategy: { maxTimeInSeconds: config.ReportExtCallMaxTimeInSeconds, maxDelayInSeconds: config.ReportExtCallMaxDelayInSeconds, retryableStatusCodes: config.ReportExtCallRetryableStatusCodes, }, - groupReportGenerator: groupReportGenerator, - instanceGroupManager: instanceGroupManager, + groupReportGenerator, + instanceGroupManager, }); // Each Queue in JobManager has its own Redis connection (other than the one in RedisClient) // Bee-Queue also uses different a Redis library, so we map redisOptions to the object expected by Bee-Queue const jobManager = new JobManager({ queueRedisOptions: redisQueueOptions, - lockManager: lockManager, - instanceGroupManager: instanceGroupManager, - instanceLauncher: instanceLauncher, + lockManager, + instanceGroupManager, + instanceLauncher, autoscaler: autoscaleProcessor, - sanityLoop: sanityLoop, - metricsLoop: metricsLoop, + sanityLoop, + metricsLoop, autoscalerProcessingTimeoutMs: config.GroupProcessingTimeoutMs, launcherProcessingTimeoutMs: config.GroupProcessingTimeoutMs, sanityLoopProcessingTimeoutMs: config.SanityProcessingTimoutMs, @@ -252,13 +258,14 @@ async function pollForMetrics(metricsLoop: MetricsLoop) { } const h = new Handlers({ - instanceTracker: instanceTracker, - instanceGroupManager: instanceGroupManager, - shutdownManager: shutdownManager, - groupReportGenerator: groupReportGenerator, - lockManager: lockManager, - audit: audit, - scalingManager: scalingManager, + instanceTracker, + instanceGroupManager, + shutdownManager, + reconfigureManager, + groupReportGenerator, + lockManager, + audit, + scalingManager, }); const validator = new Validator({ instanceTracker, instanceGroupManager }); @@ -551,6 +558,18 @@ app.put( }, ); +app.post('/groups/:name/actions/reconfigure-instances', async (req, res, next) => { + try { + const errors = validationResult(req); + if (!errors.isEmpty()) { + return res.status(400).json({ errors: errors.array() }); + } + await h.reconfigureInstanceGroup(req, res); + } catch (err) { + next(err); + } +}); + app.listen(config.HTTPServerPort, () => { logger.info(`...listening on :${config.HTTPServerPort}`); }); diff --git a/src/audit.ts b/src/audit.ts index 6a554b2..0c4ff69 100644 --- a/src/audit.ts +++ b/src/audit.ts @@ -37,6 +37,7 @@ export interface LauncherActionItem { export interface GroupAuditResponse { lastLauncherRun: string; lastAutoScalerRun: string; + lastReconfigureRequest: string; autoScalerActionItems?: AutoScalerActionItem[]; launcherActionItems?: LauncherActionItem[]; } @@ -46,6 +47,8 @@ export interface InstanceAuditResponse { requestToLaunch: string; latestStatus: string; requestToTerminate: string; + requestToReconfigure: string; + reconfigureComplete: string; latestStatusInfo?: InstanceState; } @@ -82,12 +85,23 @@ export default class Audit { this.auditTTL, ); if (latestStatusSaved) { - this.increaseLaunchEventExpiration(groupName, instanceId); - this.increaseShutdownEventExpiration(groupName, instanceId); + this.increaseInstanceExpirations(groupName, instanceId); } return latestStatusSaved; } + async increaseInstanceExpirations(groupName: string, instanceId: string): Promise { + const pipeline = this.redisClient.pipeline(); + + pipeline.expire(`audit:${groupName}:${instanceId}:request-to-launch`, this.auditTTL); + pipeline.expire(`audit:${groupName}:${instanceId}:request-to-terminate`, this.auditTTL); + pipeline.expire(`audit:${groupName}:${instanceId}:request-to-reconfigure`, this.auditTTL); + pipeline.expire(`audit:${groupName}:${instanceId}:reconfigure-complete`, this.auditTTL); + + await pipeline.exec(); + + return true; + } async saveLaunchEvent(groupName: string, instanceId: string): Promise { const value: InstanceAudit = { instanceId: instanceId, @@ -97,15 +111,6 @@ export default class Audit { return this.setInstanceValue(`audit:${groupName}:${instanceId}:request-to-launch`, value, this.auditTTL); } - private async increaseLaunchEventExpiration(groupName: string, instanceId: string): Promise { - // we don't care if this fails (e.g. perhaps the event no longer is there) - const result = await this.redisClient.expire( - `audit:${groupName}:${instanceId}:request-to-launch`, - this.auditTTL, - ); - return result == 1; - } - async saveShutdownEvents(instanceDetails: Array): Promise { const pipeline = this.redisClient.pipeline(); for (const instance of instanceDetails) { @@ -124,13 +129,36 @@ export default class Audit { await pipeline.exec(); } - private async increaseShutdownEventExpiration(groupName: string, instanceId: string): Promise { - // we don't care if this fails (e.g. perhaps the event no longer is there) - const result = await this.redisClient.expire( - `audit:${groupName}:${instanceId}:request-to-terminate`, + async saveUnsetReconfigureEvents(instanceId: string, group: string): Promise { + const value: InstanceAudit = { + instanceId: instanceId, + type: 'reconfigure-complete', + timestamp: Date.now(), + }; + await this.redisClient.set( + `audit:${group}:${instanceId}:reconfigure-complete`, + JSON.stringify(value), + 'ex', this.auditTTL, ); - return result == 1; + } + + async saveReconfigureEvents(instanceDetails: Array): Promise { + const pipeline = this.redisClient.pipeline(); + for (const instance of instanceDetails) { + const value: InstanceAudit = { + instanceId: instance.instanceId, + type: 'request-to-reconfigure', + timestamp: Date.now(), + }; + pipeline.set( + `audit:${instance.group}:${instance.instanceId}:request-to-reconfigure`, + JSON.stringify(value), + 'ex', + this.auditTTL, + ); + } + await pipeline.exec(); } async setInstanceValue(key: string, value: InstanceAudit, ttl: number): Promise { @@ -159,6 +187,17 @@ export default class Audit { return true; } + async updateLastReconfigureRequest(ctx: Context, groupName: string): Promise { + const value: GroupAudit = { + groupName: groupName, + type: 'last-reconfigure-request', + }; + const updateResponse = this.setGroupValue(groupName, value); + ctx.logger.info(`Updated last reconfiguration request for group ${groupName}`); + + return updateResponse; + } + async updateLastLauncherRun(ctx: Context, groupName: string): Promise { const updateLastLaunchStart = process.hrtime(); @@ -257,6 +296,8 @@ export default class Audit { requestToLaunch: 'unknown', latestStatus: 'unknown', requestToTerminate: 'unknown', + requestToReconfigure: 'unknown', + reconfigureComplete: 'unknown', }; instanceAuditResponseList.push(instanceAuditResponse); }); @@ -267,13 +308,19 @@ export default class Audit { )) { switch (instanceAudit.type) { case 'request-to-launch': - instanceAuditResponse.requestToLaunch = new Date(instanceAudit.timestamp).toUTCString(); + instanceAuditResponse.requestToLaunch = new Date(instanceAudit.timestamp).toISOString(); break; case 'request-to-terminate': - instanceAuditResponse.requestToTerminate = new Date(instanceAudit.timestamp).toUTCString(); + instanceAuditResponse.requestToTerminate = new Date(instanceAudit.timestamp).toISOString(); + break; + case 'request-to-reconfigure': + instanceAuditResponse.requestToReconfigure = new Date(instanceAudit.timestamp).toISOString(); + break; + case 'reconfigure-complete': + instanceAuditResponse.reconfigureComplete = new Date(instanceAudit.timestamp).toISOString(); break; case 'latest-status': - instanceAuditResponse.latestStatus = new Date(instanceAudit.timestamp).toUTCString(); + instanceAuditResponse.latestStatus = new Date(instanceAudit.timestamp).toISOString(); instanceAuditResponse.latestStatusInfo = instanceAudit.state; break; } @@ -289,6 +336,7 @@ export default class Audit { const groupAuditResponse: GroupAuditResponse = { lastLauncherRun: 'unknown', lastAutoScalerRun: 'unknown', + lastReconfigureRequest: 'unknown', }; const autoScalerActionItems: AutoScalerActionItem[] = []; @@ -296,10 +344,13 @@ export default class Audit { for (const groupAudit of groupAudits) { switch (groupAudit.type) { case 'last-launcher-run': - groupAuditResponse.lastLauncherRun = new Date(groupAudit.timestamp).toUTCString(); + groupAuditResponse.lastLauncherRun = new Date(groupAudit.timestamp).toISOString(); break; case 'last-autoScaler-run': - groupAuditResponse.lastAutoScalerRun = new Date(groupAudit.timestamp).toUTCString(); + groupAuditResponse.lastAutoScalerRun = new Date(groupAudit.timestamp).toISOString(); + break; + case 'last-reconfigure-request': + groupAuditResponse.lastReconfigureRequest = new Date(groupAudit.timestamp).toISOString(); break; case 'launcher-action-item': launcherActionItems.push(groupAudit.launcherActionItem); @@ -312,12 +363,12 @@ export default class Audit { autoScalerActionItems .sort((a, b) => (a.timestamp > b.timestamp ? -1 : 1)) .map(function (key) { - key.timestamp = new Date(key.timestamp).toUTCString(); + key.timestamp = new Date(key.timestamp).toISOString(); }); launcherActionItems .sort((a, b) => (a.timestamp > b.timestamp ? -1 : 1)) .map(function (key) { - key.timestamp = new Date(key.timestamp).toUTCString(); + key.timestamp = new Date(key.timestamp).toISOString(); }); groupAuditResponse.autoScalerActionItems = autoScalerActionItems; diff --git a/src/config.ts b/src/config.ts index 6bc1f20..b3628ea 100644 --- a/src/config.ts +++ b/src/config.ts @@ -42,6 +42,7 @@ const env = envalid.cleanEnv(process.env, { IDLE_TTL_SEC: envalid.num({ default: 300 }), // seconds, default to 5 minutes PROVISIONING_TTL_SEC: envalid.num({ default: 900 }), // seconds SHUTDOWN_TTL_SEC: envalid.num({ default: 86400 }), // default 1 day + RECONFIGURE_TTL_SEC: envalid.num({ default: 86400 }), // default 1 day SHUTDOWN_STATUS_TTL_SEC: envalid.num({ default: 600 }), // default 10 minutes AUDIT_TTL_SEC: envalid.num({ default: 172800 }), // default 2 day MAX_THROTTLE_THRESHOLD: envalid.num({ default: 40 }), // default max of 40 untracked per group to throttle scale up @@ -124,6 +125,7 @@ export default { IdleTTL: env.IDLE_TTL_SEC, ShutdownStatusTTL: env.SHUTDOWN_STATUS_TTL_SEC, ShutDownTTL: env.SHUTDOWN_TTL_SEC, + ReconfigureTTL: env.RECONFIGURE_TTL_SEC, AuditTTL: env.AUDIT_TTL_SEC, GroupRelatedDataTTL: env.GROUP_RELATED_DATA_TTL_SEC, // group processing lock diff --git a/src/group_report.ts b/src/group_report.ts index 696e2b9..48cef84 100644 --- a/src/group_report.ts +++ b/src/group_report.ts @@ -4,6 +4,7 @@ import { InstanceGroup } from './instance_group'; import { CloudInstance } from './cloud_manager'; import ShutdownManager from './shutdown_manager'; import MetricsLoop from './metrics_loop'; +import ReconfigureManager from './reconfigure_manager'; export interface InstanceReport { instanceId: string; @@ -13,6 +14,10 @@ export interface InstanceReport { cloudStatus?: string; isShuttingDown?: boolean; isScaleDownProtected?: boolean; + reconfigureScheduled?: string; + lastReconfigured?: string; + reconfigureError?: boolean; + shutdownError?: boolean; privateIp?: string; publicIp?: string; version?: string; @@ -29,6 +34,9 @@ export interface GroupReport { cloudCount?: number; unTrackedCount?: number; shuttingDownCount?: number; + shutdownErrorCount?: number; + reconfigureErrorCount?: number; + reconfigureScheduledCount?: number; scaleDownProtectedCount?: number; instances?: Array; } @@ -36,17 +44,20 @@ export interface GroupReport { export interface GroupReportGeneratorOptions { instanceTracker: InstanceTracker; shutdownManager: ShutdownManager; + reconfigureManager: ReconfigureManager; metricsLoop: MetricsLoop; } export default class GroupReportGenerator { private instanceTracker: InstanceTracker; private shutdownManager: ShutdownManager; + private reconfigureManager: ReconfigureManager; private metricsLoop: MetricsLoop; constructor(options: GroupReportGeneratorOptions) { this.instanceTracker = options.instanceTracker; this.shutdownManager = options.shutdownManager; + this.reconfigureManager = options.reconfigureManager; this.metricsLoop = options.metricsLoop; this.generateReport = this.generateReport.bind(this); @@ -76,6 +87,9 @@ export default class GroupReportGenerator { expiredCount: 0, unTrackedCount: 0, shuttingDownCount: 0, + shutdownErrorCount: 0, + reconfigureErrorCount: 0, + reconfigureScheduledCount: 0, scaleDownProtectedCount: 0, instances: [], }; @@ -94,6 +108,7 @@ export default class GroupReportGenerator { }); await this.addShutdownStatus(ctx, groupReport.instances); + await this.addReconfigureDate(ctx, groupReport.instances); await this.addShutdownProtectedStatus(ctx, groupReport.instances); groupReport.instances.forEach((instanceReport) => { @@ -106,6 +121,15 @@ export default class GroupReportGenerator { if (instanceReport.isScaleDownProtected) { groupReport.scaleDownProtectedCount++; } + if (instanceReport.reconfigureError) { + groupReport.reconfigureErrorCount++; + } + if (instanceReport.shutdownError) { + groupReport.shutdownErrorCount++; + } + if (instanceReport.reconfigureScheduled) { + groupReport.reconfigureScheduledCount++; + } if ( instanceReport.scaleStatus == 'unknown' && (instanceReport.cloudStatus === 'Provisioning' || instanceReport.cloudStatus === 'Running') @@ -155,6 +179,9 @@ export default class GroupReportGenerator { cloudStatus: 'unknown', version: 'unknown', isShuttingDown: instanceState.shutdownStatus, + lastReconfigured: instanceState.lastReconfigured, + reconfigureError: instanceState.reconfigureError, + shutdownError: instanceState.shutdownError, isScaleDownProtected: false, }; if (instanceState.shutdownStatus) { @@ -228,6 +255,19 @@ export default class GroupReportGenerator { return instanceReports; } + private async addReconfigureDate(ctx: Context, instanceReports: Array): Promise { + const reconfigureDates = await this.reconfigureManager.getReconfigureDates( + ctx, + instanceReports.map((instanceReport) => { + return instanceReport.instanceId; + }), + ); + + for (let i = 0; i < instanceReports.length; i++) { + instanceReports[i].reconfigureScheduled = reconfigureDates[i]; + } + } + private async addShutdownStatus(ctx: Context, instanceReports: Array): Promise { const shutdownStatuses = await this.shutdownManager.getShutdownStatuses( ctx, diff --git a/src/handlers.ts b/src/handlers.ts index eb35c52..8a83df7 100644 --- a/src/handlers.ts +++ b/src/handlers.ts @@ -4,13 +4,14 @@ import InstanceGroupManager, { InstanceGroup } from './instance_group'; import LockManager from './lock_manager'; import Redlock from 'redlock'; import ShutdownManager from './shutdown_manager'; +import ReconfigureManager from './reconfigure_manager'; import GroupReportGenerator from './group_report'; import Audit from './audit'; import ScalingManager from './scaling_options_manager'; interface SidecarResponse { shutdown: boolean; - reconfigure: boolean; + reconfigure: string; } interface InstanceGroupScalingActivitiesRequest { @@ -18,6 +19,7 @@ interface InstanceGroupScalingActivitiesRequest { enableLaunch?: boolean; enableScheduler?: boolean; enableUntrackedThrottle?: boolean; + enableReconfiguration?: boolean; } export interface InstanceGroupDesiredValuesRequest { @@ -81,6 +83,7 @@ interface HandlersOptions { instanceTracker: InstanceTracker; audit: Audit; shutdownManager: ShutdownManager; + reconfigureManager: ReconfigureManager; instanceGroupManager: InstanceGroupManager; groupReportGenerator: GroupReportGenerator; lockManager: LockManager; @@ -90,6 +93,7 @@ interface HandlersOptions { class Handlers { private instanceTracker: InstanceTracker; private shutdownManager: ShutdownManager; + private reconfigureManager: ReconfigureManager; private instanceGroupManager: InstanceGroupManager; private groupReportGenerator: GroupReportGenerator; private lockManager: LockManager; @@ -103,6 +107,7 @@ class Handlers { this.instanceTracker = options.instanceTracker; this.instanceGroupManager = options.instanceGroupManager; this.shutdownManager = options.shutdownManager; + this.reconfigureManager = options.reconfigureManager; this.groupReportGenerator = options.groupReportGenerator; this.audit = options.audit; this.scalingManager = options.scalingManager; @@ -110,11 +115,15 @@ class Handlers { async sidecarPoll(req: Request, res: Response): Promise { const details: InstanceDetails = req.body; - const shutdownStatus = await this.shutdownManager.getShutdownStatus(req.context, details.instanceId); - // TODO: implement reconfiguration checks - const reconfigureStatus = false; + const [shutdownStatus, reconfigureDate] = await Promise.all([ + this.shutdownManager.getShutdownStatus(req.context, details.instanceId), + this.reconfigureManager.getReconfigureDate(req.context, details.instanceId), + ]); - const sendResponse: SidecarResponse = { shutdown: shutdownStatus, reconfigure: reconfigureStatus }; + const sendResponse: SidecarResponse = { + shutdown: shutdownStatus, + reconfigure: reconfigureDate, + }; res.status(200); res.send(sendResponse); @@ -122,7 +131,13 @@ class Handlers { async sidecarStats(req: Request, res: Response): Promise { const report: StatsReport = req.body; - const shutdownStatus = await this.shutdownManager.getShutdownStatus(req.context, report.instance.instanceId); + const [shutdownStatus, reconfigureDate] = await Promise.all([ + this.shutdownManager.getShutdownStatus(req.context, report.instance.instanceId), + this.reconfigureManager.getReconfigureDate(req.context, report.instance.instanceId), + ]); + + await this.reconfigureManager.processInstanceReport(req.context, report, reconfigureDate); + await this.instanceTracker.stats(req.context, report, shutdownStatus); res.status(200); @@ -131,16 +146,24 @@ class Handlers { async sidecarStatus(req: Request, res: Response): Promise { const report: StatsReport = req.body; - const shutdownStatus = await this.shutdownManager.getShutdownStatus(req.context, report.instance.instanceId); + const [shutdownStatus, reconfigureDate] = await Promise.all([ + this.shutdownManager.getShutdownStatus(req.context, report.instance.instanceId), + this.reconfigureManager.getReconfigureDate(req.context, report.instance.instanceId), + ]); + + let postReconfigureDate = reconfigureDate; try { + postReconfigureDate = await this.reconfigureManager.processInstanceReport( + req.context, + report, + reconfigureDate, + ); await this.instanceTracker.stats(req.context, report, shutdownStatus); } catch (err) { req.context.logger.error('Status handling error', { err }); } - // TODO: implement reconfiguration checks - const reconfigureStatus = false; - const sendResponse: SidecarResponse = { shutdown: shutdownStatus, reconfigure: reconfigureStatus }; + const sendResponse: SidecarResponse = { shutdown: shutdownStatus, reconfigure: postReconfigureDate }; res.status(200); res.send(sendResponse); @@ -194,6 +217,9 @@ class Handlers { instanceGroup.enableUntrackedThrottle = scalingActivitiesRequest.enableUntrackedThrottle; } + if (scalingActivitiesRequest.enableReconfiguration != null) { + instanceGroup.enableReconfiguration = scalingActivitiesRequest.enableReconfiguration; + } await this.instanceGroupManager.upsertInstanceGroup(req.context, instanceGroup); res.status(200); res.send({ save: 'OK' }); @@ -205,6 +231,36 @@ class Handlers { } } + async reconfigureInstanceGroup(req: Request, res: Response): Promise { + + const instanceGroup = await this.instanceGroupManager.getInstanceGroup(req.params.name); + if (instanceGroup) { + if (instanceGroup.enableReconfiguration) { + // add audit item recording the request + await this.audit.updateLastReconfigureRequest(req.context, req.params.name); + // found the group, so find the instances and act upon them + // build the list of current instances + const currentInventory = await this.instanceTracker.trimCurrent(req.context, req.params.name); + const instances = this.instanceTracker.mapToInstanceDetails(currentInventory); + // set their reconfigure status to the current date + try { + await this.reconfigureManager.setReconfigureDate(req.context, instances); + res.status(200); + res.send({ save: 'OK', instances }); + } catch (err) { + req.context.logger.error('Error triggering instance reconfiguration', { err }); + res.status(500); + res.send({ save: false, error: 'Failed to trigger reconfiguration' }); + } + } else { + res.status(403); + res.send({ save: false, error: 'Reconfiguration disabled for group' }); + } + } else { + res.sendStatus(404); + } + } + async updateInstanceConfiguration(req: Request, res: Response): Promise { const instanceConfigurationUpdateRequest: InstanceConfigurationUpdateRequest = req.body; const lock: Redlock.Lock = await this.lockManager.lockGroup(req.context, req.params.name); diff --git a/src/instance_group.ts b/src/instance_group.ts index 5552851..7a1b576 100644 --- a/src/instance_group.ts +++ b/src/instance_group.ts @@ -26,6 +26,7 @@ export interface InstanceGroup { enableLaunch: boolean; enableScheduler: boolean; enableUntrackedThrottle: boolean; + enableReconfiguration?: boolean; gracePeriodTTLSec: number; protectedTTLSec: number; scalingOptions: ScalingOptions; diff --git a/src/instance_launcher.ts b/src/instance_launcher.ts index a396cf7..92e975f 100644 --- a/src/instance_launcher.ts +++ b/src/instance_launcher.ts @@ -360,7 +360,7 @@ export default class InstanceLauncher { instanceState.status.provisioning == true ); }); - return this.mapToInstanceDetails(states); + return this.instanceTracker.mapToInstanceDetails(states); } private getRunningInstances(instanceStates: Array): Array { @@ -372,7 +372,7 @@ export default class InstanceLauncher { instanceState.status.provisioning == false ); }); - return this.mapToInstanceDetails(states); + return this.instanceTracker.mapToInstanceDetails(states); } private getAvailableJibris(instanceStates: Array): Array { @@ -381,7 +381,7 @@ export default class InstanceLauncher { instanceState.status.jibriStatus && instanceState.status.jibriStatus.busyStatus == JibriStatusState.Idle ); }); - return this.mapToInstanceDetails(states); + return this.instanceTracker.mapToInstanceDetails(states); } private getExpiredJibris(instanceStates: Array): Array { @@ -391,7 +391,7 @@ export default class InstanceLauncher { instanceState.status.jibriStatus.busyStatus == JibriStatusState.Expired ); }); - return this.mapToInstanceDetails(states); + return this.instanceTracker.mapToInstanceDetails(states); } private getBusyJibris(instanceStates: Array): Array { @@ -400,16 +400,6 @@ export default class InstanceLauncher { instanceState.status.jibriStatus && instanceState.status.jibriStatus.busyStatus == JibriStatusState.Busy ); }); - return this.mapToInstanceDetails(states); - } - - private mapToInstanceDetails(states: Array): Array { - return states.map((response) => { - return { - instanceId: response.instanceId, - instanceType: response.instanceType, - group: response.metadata.group, - }; - }); + return this.instanceTracker.mapToInstanceDetails(states); } } diff --git a/src/instance_tracker.ts b/src/instance_tracker.ts index b8e4d8a..036af53 100644 --- a/src/instance_tracker.ts +++ b/src/instance_tracker.ts @@ -74,6 +74,7 @@ export interface StatsReport { shutdownError?: boolean; reconfigureError?: boolean; statsError?: boolean; + reconfigureComplete?: string; } export interface InstanceStatus { @@ -109,6 +110,7 @@ export interface InstanceState { reconfigureError?: boolean; shutdownError?: boolean; statsError?: boolean; + lastReconfigured?: string; } export interface InstanceTrackerOptions { @@ -151,7 +153,7 @@ export class InstanceTracker { } // @TODO: handle stats for instances - async stats(ctx: Context, report: StatsReport, shutdownStatus = false): Promise { + async stats(ctx: Context, report: StatsReport, shutdownStatus = false): Promise { ctx.logger.debug('Received report', { report }); const instanceState = { instanceId: report.instance.instanceId, @@ -166,6 +168,11 @@ export class InstanceTracker { reconfigureError: report.reconfigureError, statsError: report.statsError, }; + + if (report.reconfigureComplete) { + instanceState.lastReconfigured = report.reconfigureComplete; + } + if (isEmpty(report.stats) || report.statsError) { // empty stats report, this can happen either at provisioning when jibri is not yet up, or when the sidecar does not see a jibri ctx.logger.warn('Empty stats report, as it does not include jibri or jvb stats', { report }); @@ -203,8 +210,9 @@ export class InstanceTracker { return result == 1; } - async track(ctx: Context, state: InstanceState, shutdownStatus = false): Promise { + async track(ctx: Context, state: InstanceState, shutdownStatus = false): Promise { let group = 'default'; + // pull the group from metadata if provided if (state.metadata && state.metadata.group) { group = state.metadata.group; @@ -269,7 +277,7 @@ export class InstanceTracker { //monitor latest status await this.audit.saveLatestStatus(group, state.instanceId, state); - return true; + return; } async getSummaryMetricPerPeriod( @@ -543,4 +551,14 @@ export class InstanceTracker { } return states.filter((instanceState, index) => !statesShutdownStatus[index]); } + + mapToInstanceDetails(states: Array): Array { + return states.map((response) => { + return { + instanceId: response.instanceId, + instanceType: response.instanceType, + group: response.metadata.group, + }; + }); + } } diff --git a/src/reconfigure_manager.ts b/src/reconfigure_manager.ts new file mode 100644 index 0000000..da03642 --- /dev/null +++ b/src/reconfigure_manager.ts @@ -0,0 +1,84 @@ +import Redis from 'ioredis'; +import { Context } from './context'; +import Audit from './audit'; +import { InstanceDetails, StatsReport } from './instance_tracker'; + +export interface ReconfigureManagerOptions { + redisClient: Redis.Redis; + reconfigureTTL: number; + audit: Audit; +} + +export default class ReconfigureManager { + private redisClient: Redis.Redis; + private reconfigureTTL: number; + private audit: Audit; + + constructor(options: ReconfigureManagerOptions) { + this.redisClient = options.redisClient; + this.reconfigureTTL = options.reconfigureTTL; + this.audit = options.audit; + } + + reconfigureKey(instanceId: string): string { + return `instance:reconfigure:${instanceId}`; + } + + async setReconfigureDate(ctx: Context, instanceDetails: Array): Promise { + const reconfigureDate = new Date().toISOString(); + const pipeline = this.redisClient.pipeline(); + for (const instance of instanceDetails) { + const key = this.reconfigureKey(instance.instanceId); + ctx.logger.debug('Writing reconfigure date', { key, reconfigureDate }); + pipeline.set(key, reconfigureDate, 'ex', this.reconfigureTTL); + } + await pipeline.exec(); + await this.audit.saveReconfigureEvents(instanceDetails); + return true; + } + + async unsetReconfigureDate(ctx: Context, instanceId: string, group: string): Promise { + const key = this.reconfigureKey(instanceId); + const res = await this.redisClient.del(key); + ctx.logger.debug('Remove reconfigure value', { key, res }); + await this.audit.saveUnsetReconfigureEvents(instanceId, group); + return true; + } + + async getReconfigureDates(ctx: Context, instanceIds: Array): Promise { + const pipeline = this.redisClient.pipeline(); + instanceIds.forEach((instanceId) => { + const key = this.reconfigureKey(instanceId); + pipeline.get(key); + }); + const instances = await pipeline.exec(); + return instances.map((instance: any) => { + return instance[1]; + }); + } + + async getReconfigureDate(ctx: Context, instanceId: string): Promise { + const key = this.reconfigureKey(instanceId); + const res = await this.redisClient.get(key); + ctx.logger.debug('Read reconfigure value', { key, res }); + return res; + } + + async processInstanceReport(ctx: Context, report: StatsReport, reconfigureDate: string): Promise { + let returnReconfigureDate = reconfigureDate; + + if (reconfigureDate && report.reconfigureComplete) { + const dLast = new Date(report.reconfigureComplete); + const dValue = new Date(reconfigureDate); + if (dLast >= dValue) { + ctx.logger.debug('Reconfiguration found after scheduled date, unsetting reconfiguration', { + reconfigureDate, + reconfigureComplete: report.reconfigureComplete, + }); + await this.unsetReconfigureDate(ctx, report.instance.instanceId, report.instance.group); + returnReconfigureDate = ''; + } + } + return returnReconfigureDate; + } +}