Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: The disabled failed probe is not retrying when it encounters an incident after enabling it #1242

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -23,79 +23,20 @@
**********************************************************************************/

import { expect } from '@oclif/test'
import { getDowntimeDuration, addIncident, removeIncident } from '.'
import { getContext } from '../../context'

describe('Downtime counter', () => {
it('should start counter', () => {
// arrange
const probeConfig = {
alert: { id: 'PHbCL', assertion: '', message: '' },
probeID: 'APDpe',
url: 'https://example.com',
}

// act
addIncident(probeConfig)

// assert
expect(getDowntimeDuration(probeConfig)).not.eq('0 seconds')
})

it('should stop counter', () => {
// arrange
const probeConfig = {
alert: { id: 'VyYwG', assertion: '', message: '' },
probeID: 'P1n9x',
url: 'https://example.com',
}

// act
addIncident(probeConfig)
removeIncident(probeConfig)

// assert
expect(getDowntimeDuration(probeConfig)).eq('0 seconds')
})

it('should stop inexistent counter', () => {
// arrange
const probeConfig = {
alert: { id: 'knUA4', assertion: '', message: '' },
probeID: 'P1n9x',
url: 'https://example.com',
}

// act
removeIncident(probeConfig)

// assert
expect(getDowntimeDuration(probeConfig)).eq('0 seconds')
})

it('should return 0 seconds if not started yet', () => {
// arrange
const probeConfig = {
alert: { id: '9c92j', assertion: '', message: '' },
probeID: 'rwrs8',
url: 'https://example.com',
}

// assert
expect(getDowntimeDuration(probeConfig)).eq('0 seconds')
})
import { addIncident, removeIncident, getIncidents } from '.'

describe('Incident', () => {
it('allow identical urls but different probeID', () => {
// arrange
const probeConfig = {
alert: { id: 'VyYwG', assertion: '', message: '' },
probeID: 'Pn9x',
url: 'https://example.com',
probeRequestURL: 'https://example.com',
}
const probeConfig2 = {
alert: { id: 'VyYwG', assertion: '', message: '' },
probeID: 'Pn9x-2',
url: 'https://example.com',
probeRequestURL: 'https://example.com',
}

// act
Expand All @@ -104,20 +45,20 @@ describe('Downtime counter', () => {
removeIncident(probeConfig)

// assert
expect(getContext().incidents[0].probeID).eq(probeConfig2.probeID)
expect(getIncidents()[0].probeID).eq(probeConfig2.probeID)
})

it('allow identical probe-ids but different urls', () => {
// arrange
const probeConfig = {
alert: { id: 'VyYwG', assertion: '', message: '' },
probeID: 'Pn9x',
url: 'https://example.com',
probeRequestURL: 'https://example.com',
}
const probeConfig2 = {
alert: { id: 'VyYwG', assertion: '', message: '' },
probeID: 'Pn9x',
url: 'https://sub.example.com',
probeRequestURL: 'https://sub.example.com',
}

// act
Expand All @@ -126,6 +67,6 @@ describe('Downtime counter', () => {
removeIncident(probeConfig)

// assert
expect(getContext().incidents[0].probeRequestURL).eq(probeConfig2.url)
expect(getIncidents()[0].probeRequestURL).eq(probeConfig2.probeRequestURL)
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -22,57 +22,43 @@
* SOFTWARE. *
**********************************************************************************/

import { formatDistanceToNow } from 'date-fns'
import { getContext, setContext } from '../../context'
import type { ProbeAlert } from '../../interfaces/probe'
import { type Incident, getContext, setContext } from '../../context'

type DowntimeCounter = {
alert: ProbeAlert
probeID: string
url: string
createdAt?: Date
export function getIncidents() {
return getContext().incidents
}

export function addIncident({
alert,
createdAt,
probeID,
url,
}: DowntimeCounter): void {
export function findIncident(probeId: string) {
return getIncidents().find(({ probeID }) => probeID === probeId)
}

export function addIncident(
incident: Omit<Incident, 'createdAt'> & Partial<Pick<Incident, 'createdAt'>>
): void {
const newIncident = {
alert,
probeID,
probeRequestURL: url,
createdAt: createdAt || new Date(),
...incident,
createdAt: incident?.createdAt || new Date(),
}

setContext({ incidents: [...getContext().incidents, newIncident] })
setContext({ incidents: [...getIncidents(), newIncident] })
}

export function getDowntimeDuration({
export function removeIncident({
probeID,
url,
}: Omit<DowntimeCounter, 'alert'>): string {
const lastIncident = getContext().incidents.find(
(incident) =>
incident.probeID === probeID && incident.probeRequestURL === url
)

if (!lastIncident) {
return '0 seconds'
}
probeRequestURL,
}: Pick<Incident, 'probeID'> &
Partial<Pick<Incident, 'probeRequestURL'>>): void {
const newIncidents = getIncidents().filter((incident) => {
if (!probeRequestURL) {
return probeID !== incident.probeID
}

return formatDistanceToNow(lastIncident.createdAt, {
includeSeconds: true,
})
}

export function removeIncident({ probeID, url }: DowntimeCounter): void {
const newIncidents = getContext().incidents.filter(
(incident) =>
incident.probeID !== probeID || incident.probeRequestURL !== url
return (
probeID !== incident.probeID ||
probeRequestURL !== incident.probeRequestURL
)
// remove incidents with exact mach of probeID and url
)
})

setContext({ incidents: newIncidents })
}
14 changes: 8 additions & 6 deletions src/components/notification/alert-message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import { hostname, platform } from 'os'
import { promisify } from 'util'
import { format } from 'date-fns'
import { format, formatDistanceToNow } from 'date-fns'
import * as Handlebars from 'handlebars'
import getos from 'getos'
import osName from 'os-name'
Expand All @@ -33,7 +33,7 @@ import type { NotificationMessage } from '@hyperjumptech/monika-notification'
import { ProbeRequestResponse } from '../../interfaces/request'
import { ProbeAlert } from '../../interfaces/probe'
import { publicIpAddress, publicNetworkInfo } from '../../utils/public-ip'
import { getDowntimeDuration } from '../downtime-counter'
import { getIncidents } from '../incident'

const getLinuxDistro = promisify(getos)

Expand Down Expand Up @@ -144,17 +144,19 @@ Version: ${userAgent}`
}

function getRecoveryMessage(isRecovery: boolean, probeID: string, url: string) {
const incidentDateTime = getContext().incidents.find(
const incidentDateTime = getIncidents().find(
(incident) =>
incident.probeID === probeID && incident.probeRequestURL === url
)?.createdAt
)
if (!isRecovery || !incidentDateTime) {
return ''
}

const incidentDuration = getDowntimeDuration({ probeID, url })
const incidentDuration = formatDistanceToNow(incidentDateTime.createdAt, {
includeSeconds: true,
})
const humanReadableIncidentDateTime = format(
incidentDateTime,
incidentDateTime.createdAt,
'yyyy-MM-dd HH:mm:ss XXX'
)

Expand Down
4 changes: 2 additions & 2 deletions src/components/probe/prober/http/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import { getAlertID } from '../../../../utils/alert-id'
import { getEventEmitter } from '../../../../utils/events'
import { isSymonModeFrom } from '../../../config'
import { addIncident } from '../../../downtime-counter'
import { addIncident } from '../../../incident'
import { saveProbeRequestLog } from '../../../logger/history'
import { logResponseTime } from '../../../logger/response-time-log'
import { httpRequest } from './request'
Expand Down Expand Up @@ -203,7 +203,7 @@
addIncident({
alert: triggeredAlert,
probeID,
url,
probeRequestURL: url,
})

this.sendNotification({
Expand Down Expand Up @@ -260,7 +260,7 @@
request,
response,
}: ProbeResultMessageParams): string {
// TODO: make this more generic not probe dependent

Check warning on line 263 in src/components/probe/prober/http/index.ts

View workflow job for this annotation

GitHub Actions / lint

Unexpected 'todo' comment: 'TODO: make this more generic not probe...'
if (request?.ping) {
return response?.body as string
}
Expand Down
18 changes: 9 additions & 9 deletions src/components/probe/prober/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import { expect } from '@oclif/test'

import type { ProberMetadata } from '.'

import { getContext } from '../../../context'
import { createProber } from './factory'
import { getIncidents } from '../../incident'

describe('Prober', () => {
describe('Initial incident state', () => {
Expand Down Expand Up @@ -88,7 +88,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).eq(null)
expect(getContext().incidents.length).eq(0)
expect(getIncidents().length).eq(0)
})

it('should not initialize probe state if last event id is not specify', async () => {
Expand Down Expand Up @@ -125,7 +125,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).eq(null)
expect(getContext().incidents.length).eq(0)
expect(getIncidents().length).eq(0)
})

it('should not initialize probe state if last event recovered at is not null', async () => {
Expand Down Expand Up @@ -163,7 +163,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).eq(null)
expect(getContext().incidents.length).eq(0)
expect(getIncidents().length).eq(0)
})

it('should not initialize probe state if alert is not found', async () => {
Expand Down Expand Up @@ -201,7 +201,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).eq(null)
expect(getContext().incidents.length).eq(0)
expect(getIncidents().length).eq(0)
})

it('should send recovery notification if recovered_at is null and target is healthy', async () => {
Expand Down Expand Up @@ -254,7 +254,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).not.eq(null)
expect(getContext().incidents.length).eq(0)
expect(getIncidents().length).eq(0)
})

it('should not send incident notification if recovered_at is null and target is not healthy', async () => {
Expand Down Expand Up @@ -313,7 +313,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).eq(null)
expect(getContext().incidents.length).eq(1)
expect(getIncidents().length).eq(1)
})

it('should not send recovery notification if recovered_at is not null and target is healthy', async () => {
Expand Down Expand Up @@ -365,7 +365,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).eq(null)
expect(getContext().incidents.length).eq(0)
expect(getIncidents().length).eq(0)
})

it('should send incident notification if recovered_at is not null and target is not healthy', async () => {
Expand Down Expand Up @@ -425,7 +425,7 @@ describe('Prober', () => {
// assert
expect(probeRequestTotal).eq(1)
expect(webhookBody).not.eq(null)
expect(getContext().incidents.length).eq(1)
expect(getIncidents().length).eq(1)
})
})
})
Expand Down
Loading
Loading