Skip to content

Commit

Permalink
Merge pull request #412 from BaseAdresseNationale/antoineludeau/fix-d…
Browse files Browse the repository at this point in the history
…uplicate-legacy-idvoie-during-export-from-postgres-to-mongo

Fixed legacy idVoie duplicates when exporting data from postgres to mongo
  • Loading branch information
antoineludeau authored May 15, 2024
2 parents e0f8c0e + 5fc808a commit 4e1ad51
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 14 deletions.
11 changes: 7 additions & 4 deletions lib/api/consumers/export-to-exploitation-db-consumer.js
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,12 @@ export default async function exportToExploitationDB({data}) {
const commonToponymIDFantoirCodeMap = new Map()

// Map to store the common toponym ID for each legacy common toponym ID to then be able to associate it to the legacy address
const commonToponymIDlegacyCommonToponymIDMap = new Map()
const commonToponymLegacyIDCommonToponymIDMap = new Map()

// Set to store the legacy address ID to not have duplicates
// Set to store the legacy common toponym ID to avoid duplicates
const commonToponymLegacyIDSet = new Set()

// Set to store the legacy address ID to avoid duplicates
const addressLegacyIDSet = new Set()

// Clean collections
Expand Down Expand Up @@ -155,7 +158,7 @@ export default async function exportToExploitationDB({data}) {
})
// Format the data and calculate the fantoir code, tiles and postal code
const pageDataWithExtraDataCalculation = pageData.map(commonToponym => calculateExtraDataForCommonToponym(commonToponym, cog, fantoirFinder, commonToponymIDFantoirCodeMap))
const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(commonToponym => formatCommonToponymDataForLegacy(commonToponym, district, pseudoCodeVoieGenerator, commonToponymIDlegacyCommonToponymIDMap))
const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(commonToponym => formatCommonToponymDataForLegacy(commonToponym, district, pseudoCodeVoieGenerator, commonToponymLegacyIDCommonToponymIDMap, commonToponymLegacyIDSet))

// Insert the data in the collection (legacy and banID)
await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.commonToponym).insertMany(formatedPageDataForLegacy, {ordered: false})
Expand Down Expand Up @@ -193,7 +196,7 @@ export default async function exportToExploitationDB({data}) {

// Format the data and calculate the fantoir code, tiles and postal code
const pageDataWithExtraDataCalculation = pageData.map(address => calculateExtraDataForAddress(address, cog, commonToponymIDFantoirCodeMap))
const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(address => formatAddressDataForLegacy(address, district, commonToponymIDlegacyCommonToponymIDMap, addressLegacyIDSet))
const formatedPageDataForLegacy = pageDataWithExtraDataCalculation.map(address => formatAddressDataForLegacy(address, district, commonToponymLegacyIDCommonToponymIDMap, addressLegacyIDSet))

// Insert the data in the collection (legacy and banID)
await mongo.db.collection(EXPLOITATION_DB_COLLECTION_NAMES.address).insertMany(formatedPageDataForLegacy, {ordered: false})
Expand Down
31 changes: 21 additions & 10 deletions lib/api/consumers/format-to-legacy-helpers.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {readFileSync} from 'node:fs'
import {createHmac} from 'node:crypto'
import {CommonToponym, Address} from '../../util/sequelize.js'
import {convertToLegacyPositionType} from '../helper.js'
import {getCommune as getDistrictFromAdminDivision, getRegion, getDepartement as getDepartment} from '../../util/cog.cjs'
Expand Down Expand Up @@ -88,7 +89,7 @@ export const formatDistrictDataForLegacy = async (district, totalCommonToponymRe
}
}

export const formatCommonToponymDataForLegacy = (commonToponym, district, pseudoCodeVoieGenerator, commonToponymIDlegacyCommonToponymIDMap) => {
export const formatCommonToponymDataForLegacy = (commonToponym, district, pseudoCodeVoieGenerator, commonToponymLegacyIDCommonToponymIDMap, commonToponymLegacyIDSet) => {
const {labels: districtLabels, meta: {insee: {cog}}} = district
const {id, districtID, geometry, labels, meta, updateDate, addressCount, certifiedAddressCount, bbox, addressBbox} = commonToponym

Expand All @@ -105,12 +106,22 @@ export const formatCommonToponymDataForLegacy = (commonToponym, district, pseudo
// Ids
const codeAncienneCommune = meta?.bal?.codeAncienneCommune
const legacyCommonToponymFantoirId = meta?.dgfip?.fantoir ? `${cog}_${meta?.dgfip?.fantoir}` : null
const legacyCommonToponymId = meta?.dgfip?.fantoir
? `${cog}_${meta?.dgfip?.fantoir}`
: `${cog}_${pseudoCodeVoieGenerator.getCode(legacyLabelValue, codeAncienneCommune)}`.toLowerCase()

let legacyCommonToponymId = legacyCommonToponymFantoirId
// If the legacy common toponym id is already used or not defined, we calculate a pseudo code
if (!legacyCommonToponymId || commonToponymLegacyIDSet.has(legacyCommonToponymId)) {
legacyCommonToponymId = `${cog}_${pseudoCodeVoieGenerator.getCode(legacyLabelValue, codeAncienneCommune)}`.toLowerCase()
// If the pseudo code is already used, we generate a new one with a hash from the common toponym id
if (commonToponymLegacyIDSet.has(legacyCommonToponymId)) {
legacyCommonToponymId = `${cog}_${createHmac('sha256').update(id).digest('hex').slice(0, 5)}`
}
}

// Store the legacy common toponym id for each common toponym to then be able to set it on legacy addresses
commonToponymIDlegacyCommonToponymIDMap.set(id, legacyCommonToponymId)
commonToponymLegacyIDCommonToponymIDMap.set(id, legacyCommonToponymId)

// Store all the legacy common toponym id
commonToponymLegacyIDSet.add(legacyCommonToponymId)

// Geographic data
const legacyPosition = {
Expand Down Expand Up @@ -179,7 +190,7 @@ export const formatCommonToponymDataForLegacy = (commonToponym, district, pseudo
}
}

export const formatAddressDataForLegacy = (address, district, commonToponymIDlegacyCommonToponymIDMap, addressLegacyIDSet) => {
export const formatAddressDataForLegacy = (address, district, commonToponymLegacyIDCommonToponymIDMap, addressLegacyIDSet) => {
const {meta: {insee: {cog}}} = district
const {id, mainCommonToponymID, secondaryCommonToponymIDs, districtID, number, suffix, positions, labels, meta, updateDate, certified, bbox} = address

Expand All @@ -200,9 +211,9 @@ export const formatAddressDataForLegacy = (address, district, commonToponymIDleg
const addressBbox = formatBboxForLegacy(bbox)

// Ids
const legacyCommonToponymId = commonToponymIDlegacyCommonToponymIDMap.get(mainCommonToponymID)
const legacyCommonToponymId = commonToponymLegacyIDCommonToponymIDMap.get(mainCommonToponymID)
const legacyInteropKey = `${legacyCommonToponymId}_${String(number).padStart(5, '0')}${suffix ? `_${suffix}` : ''}`.toLowerCase()
const legacyID = getLegacyId(addressLegacyIDSet, legacyInteropKey)
const legacyID = getAddressLegacyId(addressLegacyIDSet, legacyInteropKey)
addressLegacyIDSet.add(legacyID)
const banIdSecondaryCommonToponyms = secondaryCommonToponymIDs && secondaryCommonToponymIDs.length > 0 ? secondaryCommonToponymIDs : null
const legacySuffix = suffix ? suffix : null
Expand Down Expand Up @@ -314,9 +325,9 @@ const formatLegacyLonLat = position => {
return [lon, lat]
}

const getLegacyId = (addressLegacyIDSet, legacyInteropKey, suffix = 0) => {
const getAddressLegacyId = (addressLegacyIDSet, legacyInteropKey, suffix = 0) => {
if (addressLegacyIDSet.has(legacyInteropKey)) {
return getLegacyId(addressLegacyIDSet, `${legacyInteropKey}__${suffix}`, suffix++)
return getAddressLegacyId(addressLegacyIDSet, `${legacyInteropKey}__${suffix}`, suffix++)
}

return `${legacyInteropKey}`
Expand Down

0 comments on commit 4e1ad51

Please sign in to comment.