Skip to content

Commit

Permalink
fix: clean old data (#466)
Browse files Browse the repository at this point in the history
  • Loading branch information
arnaudambro authored Mar 11, 2024
1 parent 204fcc3 commit f21ef15
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 44 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
Warnings:
- You are about to drop the `AlertPollutionAtmospheric` table. If the table is not empty, all the data it contains will be lost.
*/
-- DropForeignKey
ALTER TABLE "AlertPollutionAtmospheric" DROP CONSTRAINT "AlertPollutionAtmospheric_municipality_insee_code_fkey";

-- DropTable
DROP TABLE "AlertPollutionAtmospheric";
65 changes: 22 additions & 43 deletions api-node/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -272,26 +272,6 @@ enum PollutionCodeEnum {
BENZENE // Benzène - code 60
}

model AlertPollutionAtmospheric {
id String @id @default(uuid())
// common fields for all indicators
municipality_insee_code String
municipality Municipality @relation(fields: [municipality_insee_code], references: [COM])
validity_start DateTime // the date is the first date of the validity period, included
validity_end DateTime // the date is the last date of the validity period, included
diffusion_date DateTime
data_availability DataAvailabilityEnum @default(AVAILABLE)
alert_status AlertStatusEnum @default(NOT_ALERT_THRESHOLD)
created_at DateTime @default(now())
updated_at DateTime @default(now()) @updatedAt
// specific fields for this indicator
state PollutionStateEnum
code PollutionCodeEnum
comment_short String
comment_long String
}

model CronJob {
id String @id @default(uuid())
unique_key String @unique
Expand All @@ -302,29 +282,28 @@ model CronJob {
}

model Municipality {
COM String @id // Code commune
TYPECOM String
REG String // Code région
DEP String // Code département
CTCD String? // Code de la collectivité territoriale ayant les compétences départementales
ARR String? // Code arrondissement
CAN String? // Code canton. Pour les communes « multi-cantonales », code décliné de 99 à 90 (pseudo-canton) ou de 89 à 80 (communes nouvelles)
EPCI String? // Code EPCI
LIBEPCI String? // Nom de l'EPCI ou métropole
TNCC String? // Type de nom en clair (majuscules) (https://www.insee.fr/fr/information/2114773)
NCC String? // Nom en clair (majuscules)
NCCENR String? // Nom en clair (typographie riche)
LIBELLE String? // Nom en clair (typographie riche) avec article
COMPARENT String? // Code de la commune parente pour les arrondissements municipaux et les communes associées ou déléguées.
bathing_water_sites Int @default(0)
created_at DateTime @default(now())
updated_at DateTime @default(now()) @updatedAt
IndiceUv IndiceUv[]
PollenAllergyRisk PollenAllergyRisk[]
WeatherAlert WeatherAlert[]
IndiceAtmospheric IndiceAtmospheric[]
AlertPollutionAtmospheric AlertPollutionAtmospheric[]
BathingWater BathingWater[]
COM String @id // Code commune
TYPECOM String
REG String // Code région
DEP String // Code département
CTCD String? // Code de la collectivité territoriale ayant les compétences départementales
ARR String? // Code arrondissement
CAN String? // Code canton. Pour les communes « multi-cantonales », code décliné de 99 à 90 (pseudo-canton) ou de 89 à 80 (communes nouvelles)
EPCI String? // Code EPCI
LIBEPCI String? // Nom de l'EPCI ou métropole
TNCC String? // Type de nom en clair (majuscules) (https://www.insee.fr/fr/information/2114773)
NCC String? // Nom en clair (majuscules)
NCCENR String? // Nom en clair (typographie riche)
LIBELLE String? // Nom en clair (typographie riche) avec article
COMPARENT String? // Code de la commune parente pour les arrondissements municipaux et les communes associées ou déléguées.
bathing_water_sites Int @default(0)
created_at DateTime @default(now())
updated_at DateTime @default(now()) @updatedAt
IndiceUv IndiceUv[]
PollenAllergyRisk PollenAllergyRisk[]
WeatherAlert WeatherAlert[]
IndiceAtmospheric IndiceAtmospheric[]
BathingWater BathingWater[]
}

enum SeasonEnum {
Expand Down
129 changes: 129 additions & 0 deletions api-node/src/cronjobs/cleaning.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import { getAtmoIndicator } from '~/aggregators/indice_atmo.ts';

Check failure on line 1 in api-node/src/cronjobs/cleaning.ts

View workflow job for this annotation

GitHub Actions / lint

'getAtmoIndicator' is defined but never used
import { getPollensIndicator } from '~/aggregators/pollens.ts';

Check failure on line 2 in api-node/src/cronjobs/cleaning.ts

View workflow job for this annotation

GitHub Actions / lint

'getPollensIndicator' is defined but never used
import { getIndiceUVIndicator } from '~/aggregators/indice_uv.ts';

Check failure on line 3 in api-node/src/cronjobs/cleaning.ts

View workflow job for this annotation

GitHub Actions / lint

'getIndiceUVIndicator' is defined but never used
import { setupCronJob } from './utils';
import { capture } from '~/third-parties/sentry';
import { getWeatherAlert } from '~/aggregators/weather_alert';

Check failure on line 6 in api-node/src/cronjobs/cleaning.ts

View workflow job for this annotation

GitHub Actions / lint

'getWeatherAlert' is defined but never used
import { getBathingWaterIndicator } from '~/aggregators/bathing_water';

Check failure on line 7 in api-node/src/cronjobs/cleaning.ts

View workflow job for this annotation

GitHub Actions / lint

'getBathingWaterIndicator' is defined but never used
import { groupUsersByMunicipality } from '~/utils/municipalities';
import prisma from '~/prisma';
import dayjs from 'dayjs';

/*
*
*
Initialization of the cron jobs
We call them one after the other,
in order to avoid to launch them all at the same time
and have logs that are mixed and not readable.
Test it: run `npm run dev-cronjobs` and check the logs
*/

async function cleanIndicatorsData() {
try {
const municipalitiesByUser = await groupUsersByMunicipality();
const inseeCodes = municipalitiesByUser.map(
(row) => row.municipality_insee_code,
);
await prisma.bathingWater
.deleteMany({
where: {
municipality_insee_code: {
notIn: inseeCodes,
},
validity_end: {
lt: dayjs().add(-1, 'weeks').toDate(),
},
},
})
.then(() => {
console.log('Bathing water data cleaned up');
})
.catch(capture);
await prisma.indiceAtmospheric
.deleteMany({
where: {
municipality_insee_code: {
notIn: inseeCodes,
},
validity_end: {
lt: dayjs().add(-1, 'weeks').toDate(),
},
},
})
.then(() => {
console.log('Atmospheric data cleaned up');
})
.catch(capture);
await prisma.indiceUv
.deleteMany({
where: {
municipality_insee_code: {
notIn: inseeCodes,
},
validity_end: {
lt: dayjs().add(-1, 'weeks').toDate(),
},
},
})
.then(() => {
console.log('UV data cleaned up');
})
.catch(capture);
await prisma.weatherAlert
.deleteMany({
where: {
municipality_insee_code: {
notIn: inseeCodes,
},
validity_end: {
lt: dayjs().add(-1, 'weeks').toDate(),
},
},
})
.then(() => {
console.log('Weather alert data cleaned up');
})
.catch(capture);
await prisma.pollenAllergyRisk
.deleteMany({
where: {
municipality_insee_code: {
notIn: inseeCodes,
},
validity_end: {
lt: dayjs().add(-1, 'weeks').toDate(),
},
},
})
.then(() => {
console.log('Pollen data cleaned up');
})
.catch(capture);
} catch (error: any) {
capture(error, { level: 'error' });
}
}

export async function initIndicatorsCleaning() {
await Promise.resolve()
.then(() => {
console.log('Inside cleaning cronjobs');
})
.then(
async () =>
await setupCronJob({
name: 'Clean indicators data',
cronTime: '3 3 * * *', // every day at 3:03am
job: cleanIndicatorsData,
runOnInit: false,
}),
)
.then(() => {
console.log('All indicators data is cleaned up');
})
.catch(capture);
}
2 changes: 2 additions & 0 deletions api-node/src/cronjobs/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { initRecommandations } from './recommandations';
import { initAggregators } from './aggregators';
import { initNotifications } from './notifications';
import { ENVIRONMENT, SENTRY_KEY, VERSION } from '~/config';
import { initIndicatorsCleaning } from './cleaning';

const sentryEnabled = process.env.NODE_ENV !== 'development';

Expand All @@ -28,6 +29,7 @@ if (sentryEnabled) {
}

Promise.resolve()
.then(initIndicatorsCleaning) //
.then(initMunicipalities) //
.then(initRecommandations) //
.then(initAggregators) //
Expand Down
20 changes: 19 additions & 1 deletion api-node/src/utils/municipalities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ function logStep(step: string) {
);
now = Date.now();
}
export async function fillOrUpdateMunicipalitiesInDB() {

async function fillOrUpdateMunicipalitiesInDB() {
/*
Steps:
1. grab the municipalities list (around 37500 in 2023)
Expand Down Expand Up @@ -217,3 +218,20 @@ export async function fillOrUpdateMunicipalitiesInDB() {
}
logStep('Step 5: saved that in the database');
}

async function groupUsersByMunicipality() {
const result: Array<{
municipality_insee_code: Municipality['COM'];
user_count: number;
}> = await prisma.$queryRaw`
SELECT "municipality_insee_code", COUNT(*) AS user_count
FROM "User"
WHERE "municipality_insee_code" IS NOT NULL
GROUP BY "municipality_insee_code"
ORDER BY user_count DESC;
`;

return result;
}

export { fillOrUpdateMunicipalitiesInDB, groupUsersByMunicipality };

0 comments on commit f21ef15

Please sign in to comment.