From 263dc0b337696838e2bde1615a06757dba646fcd Mon Sep 17 00:00:00 2001 From: antoineludeau <52679050+antoineludeau@users.noreply.github.com> Date: Fri, 9 Aug 2024 11:49:08 +0200 Subject: [PATCH 1/3] Added temp collections when exporting to lower data unavailability time --- .../export-to-exploitation-db-consumer.js | 204 +++++++++++++----- lib/api/consumers/format-to-legacy-helpers.js | 24 +-- 2 files changed, 154 insertions(+), 74 deletions(-) diff --git a/lib/api/consumers/export-to-exploitation-db-consumer.js b/lib/api/consumers/export-to-exploitation-db-consumer.js index c7e32274..a33ab734 100644 --- a/lib/api/consumers/export-to-exploitation-db-consumer.js +++ b/lib/api/consumers/export-to-exploitation-db-consumer.js @@ -2,7 +2,7 @@ import {Transaction} from 'sequelize' import {createFantoirCommune} from '@ban-team/fantoir' import {findCodePostal} from 'codes-postaux/full.js' import mongo from '../../util/mongo.cjs' -import {sequelize, District, CommonToponym, Address} from '../../util/sequelize.js' +import {sequelize, District, CommonToponym} from '../../util/sequelize.js' import {derivePositionProps} from '../../util/geo.cjs' import {createPseudoCodeVoieGenerator} from '../../pseudo-codes-voies.cjs' @@ -72,6 +72,36 @@ const addressPageQuery = tempTableName => ` LIMIT :limit ` +const commonToponymTempTableCountQuery = tempTableName => ` + SELECT + COUNT(*) + FROM + ${tempTableName} +` + +const addressTempTableCountQuery = tempTableName => ` + SELECT + COUNT(*) + FROM + ${tempTableName} +` + +const specificCommonToponymTempTableCountQuery = tempTableName => ` + SELECT + COUNT(*) + FROM + ${tempTableName} + WHERE meta->'bal'->>'isLieuDit' = 'true'; +` + +const addressCertifiedTempTableCountQuery = tempTableName => ` + SELECT + COUNT(*) + FROM + ${tempTableName} + WHERE certified = TRUE; +` + export default async function exportToExploitationDB({data}) { const {districtID} = data console.log(`Exporting districtID ${districtID} to exploitation DB...`) @@ -130,18 +160,26 @@ export default async function exportToExploitationDB({data}) { return } - // Clean collections - // Delete all data related to the district (legacy and banID) - await deleteAllLegacyDataRelatedToCOG(cog) - - // Generate temporary table names based on districtID + // Setting temporary tables and collections + // Temporary table names const tempCommonToponymTableName = `temp_common_toponym_${cog}` const tempAddressTableName = `temp_address_${cog}` - // Drop temporary tables + // Temporary collections names + const tempCommonToponymCollectionName = `temp_${EXPLOITATION_DB_COLLECTION_NAMES.commonToponym}_${cog}` + const tempAddressCollectionName = `temp_${EXPLOITATION_DB_COLLECTION_NAMES.address}_${cog}` + const tempDistrictCollectionName = `temp_${EXPLOITATION_DB_COLLECTION_NAMES.district}_${cog}` + + // Temporary collections references + const tempCommonToponymCollection = mongo.db.collection(tempCommonToponymCollectionName) + const tempAddressCollection = mongo.db.collection(tempAddressCollectionName) + const tempDistrictCollection = mongo.db.collection(tempDistrictCollectionName) + try { - await sequelize.query(`DROP TABLE IF EXISTS ${tempCommonToponymTableName}`, {transaction}) - await sequelize.query(`DROP TABLE IF EXISTS ${tempAddressTableName}`, {transaction}) + // Drop temporary tables and collections if they exist to be sure to start from a clean state + await deleteTempTables([tempCommonToponymTableName, tempAddressTableName]) + await deleteTempCollections([tempCommonToponymCollection, tempAddressCollection, tempDistrictCollection]) + // Create temporary tables await sequelize.query(createCommonToponymTempTableQuery(tempCommonToponymTableName), { replacements: {districtID}, @@ -156,25 +194,31 @@ export default async function exportToExploitationDB({data}) { // CommonToponym // Count the total number of common toponyms and pages to process - const totalCommonToponymPages = Math.ceil(totalCommonToponymRecords / PAGE_SIZE) + const [commonToponymTempTableCountQueryResult] = await sequelize.query(commonToponymTempTableCountQuery(tempCommonToponymTableName), {transaction}) + const totalCommonToponymTempTableRecordsResult = Number(commonToponymTempTableCountQueryResult?.[0]?.count) + const totalCommonToponymPages = Math.ceil(totalCommonToponymTempTableRecordsResult / PAGE_SIZE) const fetchAndExportDataFromCommonToponymPage = async pageNumber => { - const offset = (pageNumber - 1) * PAGE_SIZE - const [pageData] = await sequelize.query(commonToponymPageQuery(tempCommonToponymTableName), { - replacements: { - districtID, - offset, - limit: PAGE_SIZE - }, - transaction, - raw: true, - }) - // Format the data and calculate the fantoir code, tiles and postal code - const pageDataWithExtraDataCalculation = pageData.map(commonToponym => calculateExtraDataForCommonToponym(commonToponym, cog, fantoirFinder, commonToponymIDFantoirCodeMap)) - const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(commonToponym => formatCommonToponymDataForLegacy(commonToponym, district, pseudoCodeVoieGenerator, commonToponymLegacyIDCommonToponymIDMap, commonToponymLegacyIDSet)) - - // Insert the data in the collection (legacy and banID) - await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.commonToponym).insertMany(formatedPageDataForLegacy, {ordered: false}) + try { + const offset = (pageNumber - 1) * PAGE_SIZE + const [pageData] = await sequelize.query(commonToponymPageQuery(tempCommonToponymTableName), { + replacements: { + offset, + limit: PAGE_SIZE + }, + transaction, + raw: true, + }) + // Format the data and calculate the fantoir code, tiles and postal code + const pageDataWithExtraDataCalculation = pageData.map(commonToponym => calculateExtraDataForCommonToponym(commonToponym, cog, fantoirFinder, commonToponymIDFantoirCodeMap)) + const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(commonToponym => formatCommonToponymDataForLegacy(commonToponym, district, pseudoCodeVoieGenerator, commonToponymLegacyIDCommonToponymIDMap, commonToponymLegacyIDSet)) + + // Insert the data in the temp collection + await tempCommonToponymCollection.insertMany(formatedPageDataForLegacy, {ordered: false}) + } catch (error) { + console.error(`Error exporting common toponym page ${pageNumber}: ${error.message}`) + throw error + } } const commonToponymsExportPromises = [] @@ -186,20 +230,14 @@ export default async function exportToExploitationDB({data}) { // Address // Count the total number of addresses and pages to process - const totalAddressRecords = await Address.count({ - where: { - districtID, - isActive: true - }, - transaction, - }) + const [addressTempTableCountQueryResult] = await sequelize.query(addressTempTableCountQuery(tempAddressTableName), {transaction}) + const totalAddressRecords = Number(addressTempTableCountQueryResult?.[0]?.count) const totalAddressPages = Math.ceil(totalAddressRecords / PAGE_SIZE) const fetchAndExportDataFromAddressPage = async pageNumber => { const offset = (pageNumber - 1) * PAGE_SIZE const [pageData] = await sequelize.query(addressPageQuery(tempAddressTableName), { replacements: { - districtID, offset, limit: PAGE_SIZE }, @@ -211,8 +249,8 @@ export default async function exportToExploitationDB({data}) { const pageDataWithExtraDataCalculation = pageData.map(address => calculateExtraDataForAddress(address, cog, commonToponymIDFantoirCodeMap)) const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(address => formatAddressDataForLegacy(address, district, commonToponymLegacyIDCommonToponymIDMap, addressLegacyIDSet)) - // Insert the data in the collection (legacy and banID) - await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.address).insertMany(formatedPageDataForLegacy, {ordered: false}) + // Insert the data in the temp collection + tempAddressCollection.insertMany(formatedPageDataForLegacy, {ordered: false}) } const addressesExportPromises = [] @@ -223,24 +261,43 @@ export default async function exportToExploitationDB({data}) { await Promise.all(addressesExportPromises) // District - // For Legacy collections - const districtFormatedForLegacy = await formatDistrictDataForLegacy(district, totalCommonToponymRecords, totalAddressRecords, transaction) - await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.district).updateOne({codeCommune: cog}, {$set: districtFormatedForLegacy}, {upsert: true}) + // Count the total number of "lieu-dit" common toponym used for the district legacy format + const [specificCommonToponymTempTableCountQueryResult] = await sequelize.query(specificCommonToponymTempTableCountQuery(tempCommonToponymTableName), {transaction}) + const totalSpecifCommonToponymRecords = Number(specificCommonToponymTempTableCountQueryResult?.[0]?.count) + + // Count the total number of certified address used for the district legacy format + const [addressCertifiedTempTableCountQueryResult] = await sequelize.query(addressCertifiedTempTableCountQuery(tempAddressTableName), {transaction}) + const totalAddressCertifiedRecords = Number(addressCertifiedTempTableCountQueryResult?.[0]?.count) + + // Commit the transaction + await transaction.commit() + + // Format the district data for the legacy format + const districtFormatedForLegacy = await formatDistrictDataForLegacy(district, {totalCommonToponymRecords, totalSpecifCommonToponymRecords, totalAddressRecords, totalAddressCertifiedRecords}) + + // Insert the data in the temp collection + await tempDistrictCollection.insertOne(districtFormatedForLegacy) // Pseudo code voie generator saving data await pseudoCodeVoieGenerator.save() + // Drop the old data + await deleteOldDataFromFinaleCollections(cog) + + // Merge the temporary tables into the final collections + await mergeTempCollectionsIntoFinaleCollections(tempDistrictCollection, tempCommonToponymCollection, tempAddressCollection) + + // Drop temporary collections + await deleteTempCollections([tempDistrictCollection, tempCommonToponymCollection, tempAddressCollection]) + // Drop temporary tables - await sequelize.query(`DROP TABLE IF EXISTS ${tempCommonToponymTableName}`, {transaction}) - await sequelize.query(`DROP TABLE IF EXISTS ${tempAddressTableName}`, {transaction}) + await deleteTempTables([tempCommonToponymTableName, tempAddressTableName]) } catch (error) { - await sequelize.query(`DROP TABLE IF EXISTS ${tempCommonToponymTableName}`, {transaction}) - await sequelize.query(`DROP TABLE IF EXISTS ${tempAddressTableName}`, {transaction}) - console.error(`Exporting districtID ${districtID} failed: ${error.message}`) + await deleteTempCollections([tempDistrictCollection, tempCommonToponymCollection, tempAddressCollection]) + await deleteTempTables([tempCommonToponymTableName, tempAddressTableName]) + throw error } - // Commit the transaction - await transaction.commit() console.log(`Exporting districtID ${districtID} done`) } catch (error) { await transaction.rollback() @@ -251,13 +308,55 @@ export default async function exportToExploitationDB({data}) { // Helpers -// Helpers for exploitation DB +// Helpers for the temporary tables and collections +const deleteOldDataFromFinaleCollections = async cog => { + await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.district).deleteOne({codeCommune: cog}) + await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.commonToponym).deleteMany({codeCommune: cog}) + await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.address).deleteMany({codeCommune: cog}) +} + +const mergeTempCollectionsIntoFinaleCollections = async (tempDistrictCollection, tempCommonToponymCollection, tempAddressCollection) => { + const collectionsToMerge = [ + {tempCollection: tempDistrictCollection, finalCollectionName: EXPLOITATION_DB_COLLECTION_NAMES.district}, + {tempCollection: tempCommonToponymCollection, finalCollectionName: EXPLOITATION_DB_COLLECTION_NAMES.commonToponym}, + {tempCollection: tempAddressCollection, finalCollectionName: EXPLOITATION_DB_COLLECTION_NAMES.address} + ] + + const mergeCollection = async ({tempCollection, finalCollectionName}) => { + await tempCollection.aggregate([ + {$match: {}}, + { + $merge: { + into: finalCollectionName + } + } + ]).toArray() + } + + const promises = collectionsToMerge.map(collectionToMerge => mergeCollection(collectionToMerge)) + await Promise.all(promises) +} + +const deleteTempCollections = async collectionReferences => { + // Get the list of existing collections + const existingCollections = await mongo.db.listCollections().toArray() + + // Extract the names of the existing collections + const existingCollectionNames = new Set(existingCollections.map(collection => collection.name)) + + const promises = collectionReferences.map(async collectionReference => { + // Check if the collection exists + if (existingCollectionNames.has(collectionReference.collectionName)) { + // Drop the collection if it exists + await collectionReference.drop() + } + }) + await Promise.all(promises) +} -const deleteAllLegacyDataRelatedToCOG = async cog => { - await Promise.all([ - mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.commonToponym).deleteMany({codeCommune: cog}), - mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.address).deleteMany({codeCommune: cog}), - ]) +const deleteTempTables = async tableNames => { + const promises = tableNames.map(tableName => sequelize.query(`DROP TABLE IF EXISTS ${tableName}`)) + await Promise.all(promises) } // Helpers for calculation @@ -361,6 +460,7 @@ const calculateAddressPostalCode = (commonToponymIDFantoirCodeMap, address, cog) } } +// Helpers to calculate the geometry and tiles const calculateCommonToponymGeometryAndTiles = commonToponym => { const {geometry: geometryFromCommonToponym, centroid} = commonToponym let geometryFromCentroid diff --git a/lib/api/consumers/format-to-legacy-helpers.js b/lib/api/consumers/format-to-legacy-helpers.js index 473b8811..1de4a12d 100644 --- a/lib/api/consumers/format-to-legacy-helpers.js +++ b/lib/api/consumers/format-to-legacy-helpers.js @@ -1,6 +1,5 @@ import {readFileSync} from 'node:fs' import {createHmac} from 'node:crypto' -import {CommonToponym, Address} from '../../util/sequelize.js' import {convertToLegacyPositionType} from '../helper.js' import {getCommune as getDistrictFromAdminDivision, getRegion, getDepartement as getDepartment} from '../../util/cog.cjs' @@ -14,29 +13,10 @@ const districtsAddressesExtraDataIndex = districtsAddressesExtraData.reduce((acc return acc }, {}) -export const formatDistrictDataForLegacy = async (district, totalCommonToponymRecords, totalAddressRecords, transaction) => { +export const formatDistrictDataForLegacy = async (district, {totalCommonToponymRecords, totalSpecifCommonToponymRecords, totalAddressRecords, totalAddressCertifiedRecords}) => { const {id, meta, labels} = district const {insee: {cog}} = meta - // Count the total number of "lieu-dit" common toponym used for the district legacy format - const totalSpecifCommonToponymRecords = await CommonToponym.count({ - where: { - districtID: id, - meta: { - bal: { - isLieuDit: true - } - }, - }, - transaction, - }) - - // Count the total number of certified addresses used for the district legacy format - const totalAddressCertifiedRecords = await Address.count({ - where: {districtID: id, certified: true}, - transaction, - }) - // District data from administrative division const districtFromAdminDivision = getDistrictFromAdminDivision(cog) const {population, codesPostaux: postalCodes, type} = districtFromAdminDivision @@ -67,7 +47,7 @@ export const formatDistrictDataForLegacy = async (district, totalCommonToponymRe } return { - banId: district?.id, + banId: id, codeCommune: cog, nomCommune: legacyLabelValue, population, From adc9b9dc3f009f0846c22d1ab5e92b39dcad30dd Mon Sep 17 00:00:00 2001 From: antoineludeau <52679050+antoineludeau@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:33:51 +0200 Subject: [PATCH 2/3] Refactored and cleaned export postres to mongo algorithm --- .../export-to-exploitation-db-consumer.js | 114 ++++++++++-------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/lib/api/consumers/export-to-exploitation-db-consumer.js b/lib/api/consumers/export-to-exploitation-db-consumer.js index a33ab734..9d1d5e94 100644 --- a/lib/api/consumers/export-to-exploitation-db-consumer.js +++ b/lib/api/consumers/export-to-exploitation-db-consumer.js @@ -54,7 +54,7 @@ const createAddressTempTableQuery = tempTableName => ` WHERE AV."districtID" = :districtID ` -const commonToponymPageQuery = tempTableName => ` +const pageQuery = tempTableName => ` SELECT * FROM @@ -63,23 +63,7 @@ const commonToponymPageQuery = tempTableName => ` LIMIT :limit ` -const addressPageQuery = tempTableName => ` - SELECT - * - FROM - ${tempTableName} - OFFSET :offset - LIMIT :limit -` - -const commonToponymTempTableCountQuery = tempTableName => ` - SELECT - COUNT(*) - FROM - ${tempTableName} -` - -const addressTempTableCountQuery = tempTableName => ` +const countQuery = tempTableName => ` SELECT COUNT(*) FROM @@ -181,34 +165,47 @@ export default async function exportToExploitationDB({data}) { await deleteTempCollections([tempCommonToponymCollection, tempAddressCollection, tempDistrictCollection]) // Create temporary tables - await sequelize.query(createCommonToponymTempTableQuery(tempCommonToponymTableName), { - replacements: {districtID}, - transaction, - }) - console.log(`Temporary table ${tempCommonToponymTableName} created`) - await sequelize.query(createAddressTempTableQuery(tempAddressTableName), { - replacements: {districtID}, - transaction, - }) - console.log(`Temporary table ${tempAddressTableName} created`) - + await sequelize.query( + createCommonToponymTempTableQuery(tempCommonToponymTableName), + { + replacements: { + districtID + }, + transaction, + }) + await sequelize.query( + createAddressTempTableQuery(tempAddressTableName), + { + replacements: { + tempTable: tempAddressTableName, + districtID + }, + transaction, + }) // CommonToponym // Count the total number of common toponyms and pages to process - const [commonToponymTempTableCountQueryResult] = await sequelize.query(commonToponymTempTableCountQuery(tempCommonToponymTableName), {transaction}) + const commonToponymTempTableCountQueryResult = await sequelize.query( + countQuery(tempCommonToponymTableName), + { + type: sequelize.QueryTypes.SELECT, + transaction, + }) const totalCommonToponymTempTableRecordsResult = Number(commonToponymTempTableCountQueryResult?.[0]?.count) const totalCommonToponymPages = Math.ceil(totalCommonToponymTempTableRecordsResult / PAGE_SIZE) const fetchAndExportDataFromCommonToponymPage = async pageNumber => { try { const offset = (pageNumber - 1) * PAGE_SIZE - const [pageData] = await sequelize.query(commonToponymPageQuery(tempCommonToponymTableName), { - replacements: { - offset, - limit: PAGE_SIZE - }, - transaction, - raw: true, - }) + const pageData = await sequelize.query( + pageQuery(tempCommonToponymTableName), + { + replacements: { + offset, + limit: PAGE_SIZE + }, + type: sequelize.QueryTypes.SELECT, + transaction, + }) // Format the data and calculate the fantoir code, tiles and postal code const pageDataWithExtraDataCalculation = pageData.map(commonToponym => calculateExtraDataForCommonToponym(commonToponym, cog, fantoirFinder, commonToponymIDFantoirCodeMap)) const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(commonToponym => formatCommonToponymDataForLegacy(commonToponym, district, pseudoCodeVoieGenerator, commonToponymLegacyIDCommonToponymIDMap, commonToponymLegacyIDSet)) @@ -230,20 +227,27 @@ export default async function exportToExploitationDB({data}) { // Address // Count the total number of addresses and pages to process - const [addressTempTableCountQueryResult] = await sequelize.query(addressTempTableCountQuery(tempAddressTableName), {transaction}) + const addressTempTableCountQueryResult = await sequelize.query( + countQuery(tempAddressTableName), + { + type: sequelize.QueryTypes.SELECT, + transaction, + }) const totalAddressRecords = Number(addressTempTableCountQueryResult?.[0]?.count) const totalAddressPages = Math.ceil(totalAddressRecords / PAGE_SIZE) const fetchAndExportDataFromAddressPage = async pageNumber => { const offset = (pageNumber - 1) * PAGE_SIZE - const [pageData] = await sequelize.query(addressPageQuery(tempAddressTableName), { - replacements: { - offset, - limit: PAGE_SIZE - }, - transaction, - raw: true, - }) + const pageData = await sequelize.query( + pageQuery(tempAddressTableName), + { + replacements: { + offset, + limit: PAGE_SIZE + }, + type: sequelize.QueryTypes.SELECT, + transaction, + }) // Format the data and calculate the fantoir code, tiles and postal code const pageDataWithExtraDataCalculation = pageData.map(address => calculateExtraDataForAddress(address, cog, commonToponymIDFantoirCodeMap)) @@ -262,14 +266,24 @@ export default async function exportToExploitationDB({data}) { // District // Count the total number of "lieu-dit" common toponym used for the district legacy format - const [specificCommonToponymTempTableCountQueryResult] = await sequelize.query(specificCommonToponymTempTableCountQuery(tempCommonToponymTableName), {transaction}) + const specificCommonToponymTempTableCountQueryResult = await sequelize.query( + specificCommonToponymTempTableCountQuery(tempCommonToponymTableName), + { + type: sequelize.QueryTypes.SELECT, + transaction, + }) const totalSpecifCommonToponymRecords = Number(specificCommonToponymTempTableCountQueryResult?.[0]?.count) // Count the total number of certified address used for the district legacy format - const [addressCertifiedTempTableCountQueryResult] = await sequelize.query(addressCertifiedTempTableCountQuery(tempAddressTableName), {transaction}) + const addressCertifiedTempTableCountQueryResult = await sequelize.query( + addressCertifiedTempTableCountQuery(tempAddressTableName), + { + type: sequelize.QueryTypes.SELECT, + transaction, + }) const totalAddressCertifiedRecords = Number(addressCertifiedTempTableCountQueryResult?.[0]?.count) - // Commit the transaction + // Commit the transaction once the temporary tables are created await transaction.commit() // Format the district data for the legacy format From 787ec7ed15fe09e1898b88e40d55b2021a6e1242 Mon Sep 17 00:00:00 2001 From: antoineludeau <52679050+antoineludeau@users.noreply.github.com> Date: Tue, 10 Sep 2024 15:03:23 +0200 Subject: [PATCH 3/3] Added missing env variables in docker compose --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 21a5791c..9a9c81a3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,6 +75,8 @@ services: - FORCE_DOWNLOAD_CONTOUR= - FORCE_DOWNLOAD_DATASETS= - IS_GENERATE_BANID_ON_ASSEMBLY=${IS_GENERATE_BANID_ON_ASSEMBLY} + - CP_PATH=${CP_PATH} + - DATANOVA_PATH=${DATANOVA_PATH} ports: - "${PORT:-5000}:5000" volumes: