Skip to content

Commit

Permalink
WIP: first pass at reconfiguration support (#118)
Browse files Browse the repository at this point in the history
* WIP: first pass at reconfiguration support

* fix audit event name
improve instance reports for homer-ts

* handle reconfiguration rest call

* tighter date formatting

* moved reconfiguration to separate class
changed reconfigure to return date string instead of boolean
changed function names to reflect date output of configuration

* cleanup init of objects

* enable reconfiguration flag on groups

* rename status to reconfigureDate, no longer receive as input

* remove lock, add try/catch for error case

* pipeline for audit instance update TTL commands
remove pipeline for individual set command

* audit reconfiguration requests, report latest
  • Loading branch information
aaronkvanmeerten authored Sep 27, 2021
1 parent fcb9e2d commit 3e3e117
Show file tree
Hide file tree
Showing 9 changed files with 352 additions and 91 deletions.
99 changes: 59 additions & 40 deletions src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import InstanceLauncher from './instance_launcher';
import LockManager from './lock_manager';
import * as stats from './stats';
import ShutdownManager from './shutdown_manager';
import ReconfigureManager from './reconfigure_manager';
import JobManager from './job_manager';
import GroupReportGenerator from './group_report';
import Audit from './audit';
Expand Down Expand Up @@ -69,7 +70,7 @@ if (config.RedisDb) {
const redisClient = new Redis(redisOptions);

const audit = new Audit({
redisClient: redisClient,
redisClient,
redisScanCount: config.RedisScanCount,
auditTTL: config.AuditTTL,
groupRelatedDataTTL: config.GroupRelatedDataTTL,
Expand All @@ -78,14 +79,20 @@ const audit = new Audit({
const shutdownManager = new ShutdownManager({
redisClient,
shutdownTTL: config.ShutDownTTL,
audit: audit,
audit,
});

const reconfigureManager = new ReconfigureManager({
redisClient,
reconfigureTTL: config.ReconfigureTTL,
audit,
});

const instanceTracker = new InstanceTracker({
redisClient,
redisScanCount: config.RedisScanCount,
shutdownManager: shutdownManager,
audit: audit,
shutdownManager,
audit,
idleTTL: config.IdleTTL,
metricTTL: config.MetricTTL,
provisioningTTL: config.ProvisioningTTL,
Expand All @@ -94,29 +101,27 @@ const instanceTracker = new InstanceTracker({
});

const cloudManager = new CloudManager({
shutdownManager: shutdownManager,
shutdownManager,
isDryRun: config.DryRun,
ociConfigurationFilePath: config.OciConfigurationFilePath,
ociConfigurationProfile: config.OciConfigurationProfile,
digitalOceanAPIToken: config.DigitalOceanAPIToken,
digitalOceanConfigurationFilePath: config.DigitalOceanConfigurationFilePath,

instanceTracker: instanceTracker,
audit: audit,
instanceTracker,
audit,
cloudProviders: config.CloudProviders,

customConfigurationLaunchScriptPath: config.CustomConfigurationLaunchScriptPath,
customConfigurationLaunchScriptTimeoutMs: config.CustomConfigurationLaunchScriptTimeoutMs,
});

const lockManager: LockManager = new LockManager(logger, {
redisClient: redisClient,
redisClient,
jobCreationLockTTL: config.JobsCreationLockTTLMs,
groupLockTTLMs: config.GroupLockTTLMs,
});

const instanceGroupManager = new InstanceGroupManager({
redisClient: redisClient,
redisClient,
redisScanCount: config.RedisScanCount,
initialGroupList: config.GroupList,
groupJobsCreationGracePeriod: config.GroupJobsCreationGracePeriodSec,
Expand All @@ -135,12 +140,12 @@ instanceGroupManager.init(initCtx).catch((err) => {
});

const autoscaleProcessor = new AutoscaleProcessor({
instanceTracker: instanceTracker,
cloudManager: cloudManager,
instanceGroupManager: instanceGroupManager,
lockManager: lockManager,
instanceTracker,
cloudManager,
instanceGroupManager,
lockManager,
redisClient,
audit: audit,
audit,
});

const metricsLoop = new MetricsLoop({
Expand All @@ -153,45 +158,46 @@ const metricsLoop = new MetricsLoop({

const instanceLauncher = new InstanceLauncher({
maxThrottleThreshold: config.MaxThrottleThreshold,
instanceTracker: instanceTracker,
cloudManager: cloudManager,
instanceGroupManager: instanceGroupManager,
lockManager: lockManager,
instanceTracker,
cloudManager,
instanceGroupManager,
lockManager,
redisClient,
shutdownManager,
audit: audit,
audit,
metricsLoop,
});

const groupReportGenerator = new GroupReportGenerator({
instanceTracker: instanceTracker,
shutdownManager: shutdownManager,
metricsLoop: metricsLoop,
instanceTracker,
shutdownManager,
reconfigureManager,
metricsLoop,
});

const sanityLoop = new SanityLoop({
redisClient: redisClient,
redisClient,
metricsTTL: config.ServiceLevelMetricsTTL,
cloudManager: cloudManager,
cloudManager,
reportExtCallRetryStrategy: {
maxTimeInSeconds: config.ReportExtCallMaxTimeInSeconds,
maxDelayInSeconds: config.ReportExtCallMaxDelayInSeconds,
retryableStatusCodes: config.ReportExtCallRetryableStatusCodes,
},
groupReportGenerator: groupReportGenerator,
instanceGroupManager: instanceGroupManager,
groupReportGenerator,
instanceGroupManager,
});

// Each Queue in JobManager has its own Redis connection (other than the one in RedisClient)
// Bee-Queue also uses different a Redis library, so we map redisOptions to the object expected by Bee-Queue
const jobManager = new JobManager({
queueRedisOptions: redisQueueOptions,
lockManager: lockManager,
instanceGroupManager: instanceGroupManager,
instanceLauncher: instanceLauncher,
lockManager,
instanceGroupManager,
instanceLauncher,
autoscaler: autoscaleProcessor,
sanityLoop: sanityLoop,
metricsLoop: metricsLoop,
sanityLoop,
metricsLoop,
autoscalerProcessingTimeoutMs: config.GroupProcessingTimeoutMs,
launcherProcessingTimeoutMs: config.GroupProcessingTimeoutMs,
sanityLoopProcessingTimeoutMs: config.SanityProcessingTimoutMs,
Expand Down Expand Up @@ -252,13 +258,14 @@ async function pollForMetrics(metricsLoop: MetricsLoop) {
}

const h = new Handlers({
instanceTracker: instanceTracker,
instanceGroupManager: instanceGroupManager,
shutdownManager: shutdownManager,
groupReportGenerator: groupReportGenerator,
lockManager: lockManager,
audit: audit,
scalingManager: scalingManager,
instanceTracker,
instanceGroupManager,
shutdownManager,
reconfigureManager,
groupReportGenerator,
lockManager,
audit,
scalingManager,
});

const validator = new Validator({ instanceTracker, instanceGroupManager });
Expand Down Expand Up @@ -551,6 +558,18 @@ app.put(
},
);

app.post('/groups/:name/actions/reconfigure-instances', async (req, res, next) => {
try {
const errors = validationResult(req);
if (!errors.isEmpty()) {
return res.status(400).json({ errors: errors.array() });
}
await h.reconfigureInstanceGroup(req, res);
} catch (err) {
next(err);
}
});

app.listen(config.HTTPServerPort, () => {
logger.info(`...listening on :${config.HTTPServerPort}`);
});
97 changes: 74 additions & 23 deletions src/audit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export interface LauncherActionItem {
export interface GroupAuditResponse {
lastLauncherRun: string;
lastAutoScalerRun: string;
lastReconfigureRequest: string;
autoScalerActionItems?: AutoScalerActionItem[];
launcherActionItems?: LauncherActionItem[];
}
Expand All @@ -46,6 +47,8 @@ export interface InstanceAuditResponse {
requestToLaunch: string;
latestStatus: string;
requestToTerminate: string;
requestToReconfigure: string;
reconfigureComplete: string;
latestStatusInfo?: InstanceState;
}

Expand Down Expand Up @@ -82,12 +85,23 @@ export default class Audit {
this.auditTTL,
);
if (latestStatusSaved) {
this.increaseLaunchEventExpiration(groupName, instanceId);
this.increaseShutdownEventExpiration(groupName, instanceId);
this.increaseInstanceExpirations(groupName, instanceId);
}
return latestStatusSaved;
}

async increaseInstanceExpirations(groupName: string, instanceId: string): Promise<boolean> {
const pipeline = this.redisClient.pipeline();

pipeline.expire(`audit:${groupName}:${instanceId}:request-to-launch`, this.auditTTL);
pipeline.expire(`audit:${groupName}:${instanceId}:request-to-terminate`, this.auditTTL);
pipeline.expire(`audit:${groupName}:${instanceId}:request-to-reconfigure`, this.auditTTL);
pipeline.expire(`audit:${groupName}:${instanceId}:reconfigure-complete`, this.auditTTL);

await pipeline.exec();

return true;
}
async saveLaunchEvent(groupName: string, instanceId: string): Promise<boolean> {
const value: InstanceAudit = {
instanceId: instanceId,
Expand All @@ -97,15 +111,6 @@ export default class Audit {
return this.setInstanceValue(`audit:${groupName}:${instanceId}:request-to-launch`, value, this.auditTTL);
}

private async increaseLaunchEventExpiration(groupName: string, instanceId: string): Promise<boolean> {
// we don't care if this fails (e.g. perhaps the event no longer is there)
const result = await this.redisClient.expire(
`audit:${groupName}:${instanceId}:request-to-launch`,
this.auditTTL,
);
return result == 1;
}

async saveShutdownEvents(instanceDetails: Array<InstanceDetails>): Promise<void> {
const pipeline = this.redisClient.pipeline();
for (const instance of instanceDetails) {
Expand All @@ -124,13 +129,36 @@ export default class Audit {
await pipeline.exec();
}

private async increaseShutdownEventExpiration(groupName: string, instanceId: string): Promise<boolean> {
// we don't care if this fails (e.g. perhaps the event no longer is there)
const result = await this.redisClient.expire(
`audit:${groupName}:${instanceId}:request-to-terminate`,
async saveUnsetReconfigureEvents(instanceId: string, group: string): Promise<void> {
const value: InstanceAudit = {
instanceId: instanceId,
type: 'reconfigure-complete',
timestamp: Date.now(),
};
await this.redisClient.set(
`audit:${group}:${instanceId}:reconfigure-complete`,
JSON.stringify(value),
'ex',
this.auditTTL,
);
return result == 1;
}

async saveReconfigureEvents(instanceDetails: Array<InstanceDetails>): Promise<void> {
const pipeline = this.redisClient.pipeline();
for (const instance of instanceDetails) {
const value: InstanceAudit = {
instanceId: instance.instanceId,
type: 'request-to-reconfigure',
timestamp: Date.now(),
};
pipeline.set(
`audit:${instance.group}:${instance.instanceId}:request-to-reconfigure`,
JSON.stringify(value),
'ex',
this.auditTTL,
);
}
await pipeline.exec();
}

async setInstanceValue(key: string, value: InstanceAudit, ttl: number): Promise<boolean> {
Expand Down Expand Up @@ -159,6 +187,17 @@ export default class Audit {
return true;
}

async updateLastReconfigureRequest(ctx: Context, groupName: string): Promise<boolean> {
const value: GroupAudit = {
groupName: groupName,
type: 'last-reconfigure-request',
};
const updateResponse = this.setGroupValue(groupName, value);
ctx.logger.info(`Updated last reconfiguration request for group ${groupName}`);

return updateResponse;
}

async updateLastLauncherRun(ctx: Context, groupName: string): Promise<boolean> {
const updateLastLaunchStart = process.hrtime();

Expand Down Expand Up @@ -257,6 +296,8 @@ export default class Audit {
requestToLaunch: 'unknown',
latestStatus: 'unknown',
requestToTerminate: 'unknown',
requestToReconfigure: 'unknown',
reconfigureComplete: 'unknown',
};
instanceAuditResponseList.push(instanceAuditResponse);
});
Expand All @@ -267,13 +308,19 @@ export default class Audit {
)) {
switch (instanceAudit.type) {
case 'request-to-launch':
instanceAuditResponse.requestToLaunch = new Date(instanceAudit.timestamp).toUTCString();
instanceAuditResponse.requestToLaunch = new Date(instanceAudit.timestamp).toISOString();
break;
case 'request-to-terminate':
instanceAuditResponse.requestToTerminate = new Date(instanceAudit.timestamp).toUTCString();
instanceAuditResponse.requestToTerminate = new Date(instanceAudit.timestamp).toISOString();
break;
case 'request-to-reconfigure':
instanceAuditResponse.requestToReconfigure = new Date(instanceAudit.timestamp).toISOString();
break;
case 'reconfigure-complete':
instanceAuditResponse.reconfigureComplete = new Date(instanceAudit.timestamp).toISOString();
break;
case 'latest-status':
instanceAuditResponse.latestStatus = new Date(instanceAudit.timestamp).toUTCString();
instanceAuditResponse.latestStatus = new Date(instanceAudit.timestamp).toISOString();
instanceAuditResponse.latestStatusInfo = instanceAudit.state;
break;
}
Expand All @@ -289,17 +336,21 @@ export default class Audit {
const groupAuditResponse: GroupAuditResponse = {
lastLauncherRun: 'unknown',
lastAutoScalerRun: 'unknown',
lastReconfigureRequest: 'unknown',
};

const autoScalerActionItems: AutoScalerActionItem[] = [];
const launcherActionItems: LauncherActionItem[] = [];
for (const groupAudit of groupAudits) {
switch (groupAudit.type) {
case 'last-launcher-run':
groupAuditResponse.lastLauncherRun = new Date(groupAudit.timestamp).toUTCString();
groupAuditResponse.lastLauncherRun = new Date(groupAudit.timestamp).toISOString();
break;
case 'last-autoScaler-run':
groupAuditResponse.lastAutoScalerRun = new Date(groupAudit.timestamp).toUTCString();
groupAuditResponse.lastAutoScalerRun = new Date(groupAudit.timestamp).toISOString();
break;
case 'last-reconfigure-request':
groupAuditResponse.lastReconfigureRequest = new Date(groupAudit.timestamp).toISOString();
break;
case 'launcher-action-item':
launcherActionItems.push(groupAudit.launcherActionItem);
Expand All @@ -312,12 +363,12 @@ export default class Audit {
autoScalerActionItems
.sort((a, b) => (a.timestamp > b.timestamp ? -1 : 1))
.map(function (key) {
key.timestamp = new Date(key.timestamp).toUTCString();
key.timestamp = new Date(key.timestamp).toISOString();
});
launcherActionItems
.sort((a, b) => (a.timestamp > b.timestamp ? -1 : 1))
.map(function (key) {
key.timestamp = new Date(key.timestamp).toUTCString();
key.timestamp = new Date(key.timestamp).toISOString();
});

groupAuditResponse.autoScalerActionItems = autoScalerActionItems;
Expand Down
Loading

0 comments on commit 3e3e117

Please sign in to comment.