Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update(app-folder):kf1.9 #228

Merged
merged 2 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions components/centraldashboard/app/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {readFile} from 'fs/promises';
import {resolve} from 'path';

export const ERRORS = {
no_metrics_service_configured: 'No metrics service configured',
operation_not_supported: 'Operation not supported',
invalid_links_config: 'Cannot load dashboard menu link',
invalid_settings: 'Cannot load dashboard settings',
Expand Down Expand Up @@ -39,6 +40,15 @@ export class Api {
*/
routes(): Router {
return Router()
.get('/metrics', async (req: Request, res: Response) => {
if (!this.metricsService) {
return apiError({
res, code: 405,
error: ERRORS.operation_not_supported,
});
}
res.json(this.metricsService.getChartsLink());
})
.get(
'/metrics/:type((node|podcpu|podmem))',
async (req: Request, res: Response) => {
Expand All @@ -50,8 +60,10 @@ export class Api {
}

let interval = Interval.Last15m;
if (Interval[req.query.interval] !== undefined) {
interval = Number(Interval[req.query.interval]);
const intervalQuery = req.query.interval as string;
const intervalQueryKey = intervalQuery as keyof typeof Interval;
if (Interval[intervalQueryKey] !== undefined) {
interval = Interval[intervalQueryKey];
}
switch (req.params.type) {
case 'node':
Expand Down
30 changes: 25 additions & 5 deletions components/centraldashboard/app/api_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,22 @@ describe('Main API', () => {
port = addressInfo.port;
});

it('Should return a 405 status code', (done) => {
get(`http://localhost:${port}/api/metrics/podcpu`, (res) => {
expect(res.statusCode).toBe(405);
done();
it('Should return a 405 status code', async () => {
const metricsEndpoint = new Promise((resolve) => {
get(`http://localhost:${port}/api/metrics`, (res) => {
expect(res.statusCode).toBe(405);
resolve();
});
});

const metricsTypeEndpoint = new Promise((resolve) => {
get(`http://localhost:${port}/api/metrics/podcpu`, (res) => {
expect(res.statusCode).toBe(405);
resolve();
});
});

await Promise.all([metricsEndpoint, metricsTypeEndpoint]);
});
});

Expand All @@ -47,7 +58,7 @@ describe('Main API', () => {
mockK8sService = jasmine.createSpyObj<KubernetesService>(['']);
mockProfilesService = jasmine.createSpyObj<DefaultApi>(['']);
mockMetricsService = jasmine.createSpyObj<MetricsService>([
'getNodeCpuUtilization', 'getPodCpuUtilization', 'getPodMemoryUsage'
'getNodeCpuUtilization', 'getPodCpuUtilization', 'getPodMemoryUsage', 'getChartsLink'
]);

testApp = express();
Expand All @@ -64,6 +75,15 @@ describe('Main API', () => {
}
});

it('Should retrieve charts link in Metrics service', (done) => {
get(`http://localhost:${port}/api/metrics`, (res) => {
expect(res.statusCode).toBe(200);
expect(mockMetricsService.getChartsLink)
.toHaveBeenCalled();
done();
});
});

it('Should retrieve Node CPU Utilization for default 15m interval',
async () => {
const defaultInterval = new Promise((resolve) => {
Expand Down
14 changes: 8 additions & 6 deletions components/centraldashboard/app/api_workgroup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import {
ERRORS,
} from './api';

// From: https://www.w3resource.com/javascript/form/email-validation.php
const EMAIL_RGX = /^\w+([\.-]?\w+)*@\w+([\.-]?\w+)*(\.\w{2,3})+$/;
// From: https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
const EMAIL_RGX = /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;

// Valid actions for handling a contributor
type ContributorActions = 'create' | 'remove';
Expand Down Expand Up @@ -39,14 +39,16 @@ interface EnvironmentInfo {
isClusterAdmin: boolean;
}

export type SimpleRole = 'owner'| 'contributor';
mathis-marcotte marked this conversation as resolved.
Show resolved Hide resolved
export type WorkgroupRole = 'admin' | 'edit';
export type SimpleRole = 'owner' | 'contributor' | 'viewer';
export type WorkgroupRole = 'admin' | 'edit' | 'view';
export type Role = SimpleRole | WorkgroupRole;
export const roleMap: ReadonlyMap<Role, Role> = new Map([
['admin', 'owner'],
['owner', 'admin'],
['edit', 'contributor'],
['contributor', 'edit'],
['view', 'viewer'],
['viewer', 'view'],
]);

export interface SimpleBinding {
Expand Down Expand Up @@ -250,8 +252,8 @@ export class WorkgroupApi {
res.json(users);
} catch (err) {
const errMessage = [
`Unable to add new contributor for ${namespace}: ${err.stack || err}`,
`Unable to fetch contributors for ${namespace}: ${err.stack || err}`,
`Unable to add new contributor for ${namespace}. HTTP ${err.response.statusCode || '???'} - ${err.response.statusMessage || 'Unknown'}`,
`Unable to fetch contributors for ${namespace}. HTTP ${err.response.statusCode || '???'} - ${err.response.statusMessage || 'Unknown'}`,
][errIndex];
surfaceProfileControllerErrors({
res,
Expand Down
4 changes: 2 additions & 2 deletions components/centraldashboard/app/k8s_service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ const SHARES_ERRORS_CM_NAME = 'shares-errors';

/** Wrap Kubernetes API calls in a simpler interface for use in routes. */
export class KubernetesService {
private namespace = 'kubeflow';
private namespace = process.env.POD_NAMESPACE || 'kubeflow';
private coreAPI: k8s.CoreV1Api;
private customObjectsAPI: k8s.CustomObjectsApi;
private dashboardConfigMap = DASHBOARD_CONFIGMAP;
Expand Down Expand Up @@ -275,7 +275,7 @@ export class KubernetesService {
}

/** Retrieves the list of events for the given Namespace from the Cluster. */
async getEventsForNamespace(namespace: string): Promise<k8s.V1Event[]> {
async getEventsForNamespace(namespace: string): Promise<k8s.CoreV1Event[]> {
try {
const {body} = await this.coreAPI.listNamespacedEvent(namespace);
return body.items;
Expand Down
2 changes: 1 addition & 1 deletion components/centraldashboard/app/k8s_service_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ describe('KubernetesService', () => {
]
} as unknown; // needed to work around TS compiler
mockApiClient.listNamespacedEvent.and.returnValue(Promise.resolve(
{response: mockResponse, body: response as k8s.V1EventList}));
{response: mockResponse, body: response as k8s.CoreV1EventList}));

const events = await k8sService.getEventsForNamespace('kubeflow');
const eventNames = events.map((n) => n.metadata.name);
Expand Down
21 changes: 16 additions & 5 deletions components/centraldashboard/app/metrics_service.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/** Time-series interval enumeration. */
export enum Interval {
Last5m,
Last15m,
Last30m,
Last60m,
Last180m
Last5m = 'Last5m',
Last15m = 'Last15m',
Last30m = 'Last30m',
Last60m = 'Last60m',
Last180m = 'Last180m',
}

/** Data-point contained in a time series. */
Expand All @@ -14,6 +14,11 @@ export interface TimeSeriesPoint {
value: number;
}

export interface MetricsInfo {
resourceChartsLink: string | undefined;
resourceChartsLinkText: string;
}

/**
* Interface definition for implementers of metrics services capable of
* returning time-series resource utilization metrics for the Kubeflow system.
Expand All @@ -39,4 +44,10 @@ export interface MetricsService {
* @param interval
*/
getPodMemoryUsage(interval: Interval): Promise<TimeSeriesPoint[]>;

/**
* Return a MetricsInfo object containing the url of the metric dashboard and the
* text to display for the redirect button.
*/
getChartsLink(): MetricsInfo;
}
90 changes: 90 additions & 0 deletions components/centraldashboard/app/prometheus_metrics_service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import {Interval, MetricsInfo, MetricsService, TimeSeriesPoint} from "./metrics_service";
import {PrometheusDriver, RangeVector, ResponseType} from 'prometheus-query';

export class PrometheusMetricsService implements MetricsService {
wg102 marked this conversation as resolved.
Show resolved Hide resolved
private readonly prometheusDriver: PrometheusDriver;
private readonly dashboardUrl: string | undefined;

constructor(prometheusDriver: PrometheusDriver, dashboardUrl: string | undefined) {
this.prometheusDriver = prometheusDriver;
this.dashboardUrl = dashboardUrl;
}

async getNodeCpuUtilization(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(rate(node_cpu_seconds_total[5m])) by (instance)`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

async getPodCpuUtilization(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(rate(container_cpu_usage_seconds_total[5m]))`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

async getPodMemoryUsage(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(container_memory_usage_bytes)`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

private async queryPrometheus(query: string, start: number, end: number = Date.now()): Promise<RangeVector[]> {
const result = await this.prometheusDriver.rangeQuery(query, start, end, 10);
if(result.resultType !== ResponseType.MATRIX) {
console.warn(`The prometheus server returned invalid result type: ${result.resultType}`);
return [];
}
return result.result as RangeVector[];
}

private getCorrespondingTime(interval: Interval): number {
let minutes = 0;
switch (interval) {
case Interval.Last5m:
minutes = 5;
break;
case Interval.Last15m:
minutes = 15;
break;
case Interval.Last30m:
minutes = 30;
break;
case Interval.Last60m:
minutes = 60;
break;
case Interval.Last180m:
minutes = 180;
break;
default:
console.warn("unknown interval.");
}
return Date.now() - minutes * 60 * 1000;
}

private convertToTimeSeriesPoints(series: RangeVector[]): TimeSeriesPoint[] {
const timeSeriesPoints: TimeSeriesPoint[] = [];
series.forEach(serie => {

const label = Object.entries(serie.metric.labels).map((entry) => {
return entry[0] + "=" + entry[1];
}).join(",");

// The `public/components/resource-chart.js` is multiplying the timestamp by 1000 and the value by 100
serie.values.forEach(value => {
timeSeriesPoints.push({
timestamp: value.time.getTime() / 1000,
label,
value: value.value / 100,
});
});
});
return timeSeriesPoints;
}

getChartsLink(): MetricsInfo {
return {
resourceChartsLink: this.dashboardUrl,
resourceChartsLinkText: 'View in dashboard'
};
}
}
Loading
Loading