Skip to content

Commit

Permalink
feat: sidecar shutdown complete reporting (#155)
Browse files Browse the repository at this point in the history
* feat: shutdown confirmation support

* further sidecar shutdown hook support

* fix handler

* handler updates

* audit support for sidecar shutdown

* include new scale status

* better log output
  • Loading branch information
aaronkvanmeerten committed Apr 19, 2024
1 parent 2b325b8 commit 3c840df
Show file tree
Hide file tree
Showing 8 changed files with 282 additions and 8 deletions.
9 changes: 9 additions & 0 deletions src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ const h = new Handlers({
instanceTracker,
instanceGroupManager,
shutdownManager,
cloudManager,
reconfigureManager,
groupReportGenerator,
lockManager,
Expand Down Expand Up @@ -340,6 +341,14 @@ app.post('/sidecar/poll', async (req, res, next) => {
}
});

app.post('/sidecar/shutdown', async (req, res, next) => {
try {
await h.sidecarShutdown(req, res);
} catch (err) {
next(err);
}
});

app.post('/sidecar/stats', async (req, res, next) => {
try {
await h.sidecarStats(req, res);
Expand Down
24 changes: 24 additions & 0 deletions src/audit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export interface InstanceAuditResponse {
requestToTerminate: string;
requestToReconfigure: string;
reconfigureComplete: string;
terminationConfirmation: string;
latestStatusInfo?: InstanceState;
}

Expand Down Expand Up @@ -96,6 +97,7 @@ export default class Audit {

pipeline.expire(`audit:${groupName}:${instanceId}:request-to-launch`, this.auditTTL);
pipeline.expire(`audit:${groupName}:${instanceId}:request-to-terminate`, this.auditTTL);
pipeline.expire(`audit:${groupName}:${instanceId}:confirmation-of-termination`, this.auditTTL);
pipeline.expire(`audit:${groupName}:${instanceId}:request-to-reconfigure`, this.auditTTL);
pipeline.expire(`audit:${groupName}:${instanceId}:reconfigure-complete`, this.auditTTL);

Expand Down Expand Up @@ -130,6 +132,24 @@ export default class Audit {
await pipeline.exec();
}

async saveShutdownConfirmationEvents(instanceDetails: Array<InstanceDetails>): Promise<void> {
const pipeline = this.redisClient.pipeline();
for (const instance of instanceDetails) {
const value: InstanceAudit = {
instanceId: instance.instanceId,
type: 'confirmation-of-termination',
timestamp: Date.now(),
};
pipeline.set(
`audit:${instance.group}:${instance.instanceId}:confirmation-of-termination`,
JSON.stringify(value),
'EX',
this.auditTTL,
);
}
await pipeline.exec();
}

async saveUnsetReconfigureEvents(instanceId: string, group: string): Promise<void> {
const value: InstanceAudit = {
instanceId: instanceId,
Expand Down Expand Up @@ -307,6 +327,7 @@ export default class Audit {
requestToTerminate: 'unknown',
requestToReconfigure: 'unknown',
reconfigureComplete: 'unknown',
terminationConfirmation: 'unknown',
};
instanceAuditResponseList.push(instanceAuditResponse);
});
Expand All @@ -322,6 +343,9 @@ export default class Audit {
case 'request-to-terminate':
instanceAuditResponse.requestToTerminate = new Date(instanceAudit.timestamp).toISOString();
break;
case 'confirmation-of-termination':
instanceAuditResponse.terminationConfirmation = new Date(instanceAudit.timestamp).toISOString();
break;
case 'request-to-reconfigure':
instanceAuditResponse.requestToReconfigure = new Date(instanceAudit.timestamp).toISOString();
break;
Expand Down
7 changes: 7 additions & 0 deletions src/cloud_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ export default class CloudManager {
return true;
}

async shutdownInstance(ctx: Context, instance: InstanceDetails): Promise<boolean> {
const groupName = instance.group;
ctx.logger.info(`[CloudManager] Shutting down instance ${instance.instanceId} from group ${groupName}`);
await this.shutdownManager.setShutdownConfirmation(ctx, [instance]);
return true;
}

async getInstances(
ctx: Context,
group: InstanceGroup,
Expand Down
24 changes: 23 additions & 1 deletion src/group_report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export interface InstanceReport {
instanceName?: string;
scaleStatus?: string;
cloudStatus?: string;
shutdownComplete?: string | false;
isShuttingDown?: boolean;
isScaleDownProtected?: boolean;
reconfigureScheduled?: string;
Expand All @@ -33,6 +34,7 @@ export interface GroupReport {
expiredCount?: number;
cloudCount?: number;
unTrackedCount?: number;
shutdownCount?: number;
shuttingDownCount?: number;
shutdownErrorCount?: number;
reconfigureErrorCount?: number;
Expand Down Expand Up @@ -108,6 +110,7 @@ export default class GroupReportGenerator {
});

await this.addShutdownStatus(ctx, groupReport.instances);
await this.addShutdownConfirmations(ctx, groupReport.instances);
await this.addReconfigureDate(ctx, groupReport.instances);
await this.addShutdownProtectedStatus(ctx, groupReport.instances);

Expand All @@ -118,6 +121,9 @@ export default class GroupReportGenerator {
if (instanceReport.isShuttingDown) {
groupReport.shuttingDownCount++;
}
if (instanceReport.shutdownComplete) {
groupReport.shutdownCount++;
}
if (instanceReport.isScaleDownProtected) {
groupReport.scaleDownProtectedCount++;
}
Expand Down Expand Up @@ -186,12 +192,15 @@ export default class GroupReportGenerator {
cloudStatus: 'unknown',
version: 'unknown',
isShuttingDown: instanceState.shutdownStatus,
shutdownComplete: instanceState.shutdownComplete,
lastReconfigured: instanceState.lastReconfigured,
reconfigureError: instanceState.reconfigureError,
shutdownError: instanceState.shutdownError,
isScaleDownProtected: false,
};
if (instanceState.shutdownStatus) {
if (instanceState.shutdownComplete) {
instanceReport.scaleStatus = 'SHUTDOWN COMPLETE';
} else if (instanceState.shutdownStatus) {
instanceReport.scaleStatus = 'SHUTDOWN';
} else if (instanceState.status.provisioning) {
instanceReport.scaleStatus = 'PROVISIONING';
Expand Down Expand Up @@ -306,6 +315,19 @@ export default class GroupReportGenerator {
});
}

private async addShutdownConfirmations(ctx: Context, instanceReports: Array<InstanceReport>): Promise<void> {
(
await this.shutdownManager.getShutdownConfirmations(
ctx,
instanceReports.map((instanceReport) => {
return instanceReport.instanceId;
}),
)
).map((confirmation, index) => {
instanceReports[index].shutdownComplete = confirmation;
});
}

private async addShutdownProtectedStatus(ctx: Context, instanceReports: Array<InstanceReport>): Promise<void> {
const instanceReportsProtectedStatus: boolean[] = await this.shutdownManager.areScaleDownProtected(
ctx,
Expand Down
25 changes: 25 additions & 0 deletions src/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import GroupReportGenerator from './group_report';
import Audit from './audit';
import ScalingManager from './scaling_options_manager';
import * as promClient from 'prom-client';
import CloudManager from './cloud_manager';

const statsErrors = new promClient.Counter({
name: 'autoscaler_stats_errors',
Expand Down Expand Up @@ -92,6 +93,7 @@ interface InstanceConfigurationUpdateRequest {
}

interface HandlersOptions {
cloudManager: CloudManager;
instanceTracker: InstanceTracker;
audit: Audit;
shutdownManager: ShutdownManager;
Expand All @@ -103,6 +105,7 @@ interface HandlersOptions {
}

class Handlers {
private cloudManager: CloudManager;
private instanceTracker: InstanceTracker;
private shutdownManager: ShutdownManager;
private reconfigureManager: ReconfigureManager;
Expand All @@ -116,6 +119,7 @@ class Handlers {
this.sidecarPoll = this.sidecarPoll.bind(this);

this.lockManager = options.lockManager;
this.cloudManager = options.cloudManager;
this.instanceTracker = options.instanceTracker;
this.instanceGroupManager = options.instanceGroupManager;
this.shutdownManager = options.shutdownManager;
Expand Down Expand Up @@ -150,6 +154,27 @@ class Handlers {
}
}

async sidecarShutdown(req: Request, res: Response): Promise<void> {
const details: InstanceDetails = req.body;
req.context.logger.info('Received shutdown confirmation', { details });
statsCounter.inc();
try {
await this.cloudManager.shutdownInstance(req.context, details);

const sendResponse = {
save: 'OK',
};

res.status(200);
res.send(sendResponse);
} catch (err) {
req.context.logger.error('Shutdown handling error', { err });
statsErrors.inc();

res.status(500);
res.send({ save: 'ERROR' });
}
}
async sidecarStats(req: Request, res: Response): Promise<void> {
const report: StatsReport = req.body;
statsCounter.inc();
Expand Down
15 changes: 8 additions & 7 deletions src/instance_tracker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ export interface InstanceState {
timestamp?: number;
metadata: InstanceMetadata;
shutdownStatus?: boolean;
shutdownComplete?: string;
reconfigureError?: boolean;
shutdownError?: boolean;
statsError?: boolean;
Expand Down Expand Up @@ -672,18 +673,18 @@ export class InstanceTracker {
}

async filterOutInstancesShuttingDown(ctx: Context, states: Array<InstanceState>): Promise<Array<InstanceState>> {
const shutdownStatuses = await this.shutdownManager.getShutdownStatuses(
ctx,
states.map((state) => {
return state.instanceId;
}),
);
const instanceIds = states.map((state) => {
return state.instanceId;
});
const shutdownStatuses = await this.shutdownManager.getShutdownStatuses(ctx, instanceIds);

const shutdownConfirmations = await this.shutdownManager.getShutdownConfirmations(ctx, instanceIds);

const statesShutdownStatus: boolean[] = [];
for (let i = 0; i < states.length; i++) {
statesShutdownStatus.push(this.shutdownStatusFromState(states[i]) || shutdownStatuses[i]);
}
return states.filter((instanceState, index) => !statesShutdownStatus[index]);
return states.filter((instanceState, index) => !statesShutdownStatus[index] && !shutdownConfirmations[index]);
}

mapToInstanceDetails(states: Array<InstanceState>): Array<InstanceDetails> {
Expand Down
51 changes: 51 additions & 0 deletions src/shutdown_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ export default class ShutdownManager {
return `instance:shutdown:${instanceId}`;
}

shutDownConfirmedKey(instanceId: string): string {
return `instance:shutdownConfirmed:${instanceId}`;
}

protectedKey(instanceId: string): string {
return `instance:scaleDownProtected:${instanceId}`;
}
Expand Down Expand Up @@ -61,13 +65,60 @@ export default class ShutdownManager {
}
}

async getShutdownConfirmations(ctx: Context, instanceIds: Array<string>): Promise<(string | false)[]> {
const pipeline = this.redisClient.pipeline();
instanceIds.forEach((instanceId) => {
const key = this.shutDownConfirmedKey(instanceId);
pipeline.get(key);
});
const instances = await pipeline.exec();
if (instances) {
return instances.map((instance: [error: Error | null, result: unknown]) => {
if (instance[1] == null) {
return false;
} else {
return <string>instance[1];
}
});
} else {
ctx.logger.error('ShutdownConfirmations Failed in pipeline.exec()');
return [];
}
}

async getShutdownStatus(ctx: Context, instanceId: string): Promise<boolean> {
const key = this.shutDownKey(instanceId);
const res = await this.redisClient.get(key);
ctx.logger.debug('Read shutdown status', { key, res });
return res == 'shutdown';
}

async getShutdownConfirmation(ctx: Context, instanceId: string): Promise<false | string> {
const key = this.shutDownConfirmedKey(instanceId);
const res = await this.redisClient.get(key);
ctx.logger.debug('Read shutdown confirmation', { key, res });
if (res) {
return res;
}
return false;
}

async setShutdownConfirmation(
ctx: Context,
instanceDetails: Array<InstanceDetails>,
status = new Date().toISOString(),
): Promise<boolean> {
const pipeline = this.redisClient.pipeline();
for (const instance of instanceDetails) {
const key = this.shutDownConfirmedKey(instance.instanceId);
ctx.logger.debug('Writing shutdown confirmation', { key, status });
pipeline.set(key, status, 'EX', this.shutdownTTL);
}
await pipeline.exec();
await this.audit.saveShutdownConfirmationEvents(instanceDetails);
return true;
}

async setScaleDownProtected(
ctx: Context,
instanceId: string,
Expand Down
Loading

0 comments on commit 3c840df

Please sign in to comment.