Skip to content

Commit

Permalink
Added data calculs during exports from postgres to mongo
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineludeau committed Sep 28, 2023
1 parent cbd8dd8 commit 001d625
Showing 1 changed file with 179 additions and 27 deletions.
206 changes: 179 additions & 27 deletions lib/api/consumers/export-to-exploitation-db-consumer.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,58 @@
import {Transaction} from 'sequelize'
import {sequelize, District, CommonToponym, Address} from '../../util/sequelize.js'
import {createFantoirCommune} from '@ban-team/fantoir'
import {findCodePostal} from 'codes-postaux/full.js'
import mongo from '../../util/mongo.cjs'
import {sequelize, District, CommonToponym, Address} from '../../util/sequelize.js'
import {derivePositionProps} from '../../util/geo.cjs'

// Seetings

// The number of records to process per page
const PAGE_SIZE = 100

// The path to the fantoir sqlite database
const FANTOIR_PATH = process.env.FANTOIR_PATH || 'data/fantoir.sqlite'

// The min and max zoom levels to use for the tiles
const COMMON_TOPONYM_TILES_MIN_ZOOM = 10
const COMMON_TOPONYM_TILES_MAX_ZOOM = 14
const ADDRESS_TILES_MIN_ZOOM = 12
const ADDRESS_TILES_MAX_ZOOM = 14

// Collections names
const DISTRICT_COLLECTION = 'districts'
const COMMON_TOPONYM_COLLECTION = 'common_toponyms'
const ADDRESS_COLLECTION = 'addresses'

// The priority of each position type
const POSITION_TYPES_PRIORITY = {
entrance: 1,
building: 2,
'staircase identifier': 3,
'unit identifier': 4,
'utility service': 5,
'postal delivery': 6,
parcel: 7,
segment: 8,
other: 9
}

const DistrictCollection = 'districts'
const CommonToponymCollection = 'common_toponyms'
const AddressCollection = 'addresses'

const pageSize = 100
const commonToponymPageQuery = `
SELECT
CT.id, CT."districtID", CT.labels, CT.geometry, CT."updateDate", CT.meta, CT."createdAt", CT."updatedAt",
ST_Centroid(ST_Collect(ST_GeomFromGeoJSON((A.positions[1])->'geometry'))) AS centroid
FROM
"CommonToponyms" AS CT
LEFT JOIN
"Addresses" AS A
ON
CT.id = A."mainCommonToponymID"
WHERE CT."districtID" = :districtID
GROUP BY CT.id
ORDER BY CT.id ASC
OFFSET :offset
LIMIT :limit
`

export default async function exportToExploitationDB({data}) {
const {districtID} = data
Expand Down Expand Up @@ -34,51 +80,157 @@ export default async function exportToExploitationDB({data}) {
await deleteAllDataRelatedToDistrict(districtID)

// Insert the district
await mongo.db.collection(DistrictCollection).insertOne(district)
await mongo.db.collection(DISTRICT_COLLECTION).insertOne(district)

// CommonToponym
// Prepare fantoir finder from cog and fantoir sqlite database
const {meta: {insee: {cog}}} = district
const fantoirFinder = await createFantoirCommune(cog, {FANTOIR_PATH})

// Map to store the fantoir code for each common toponym
const commonToponymIDFantoirCodeMap = new Map()

// Count the total number of common toponyms and pages to process
const totalCommonToponymRecords = await CommonToponym.count({
where: {districtID},
transaction,
})
const totalCommonToponymPages = Math.ceil(totalCommonToponymRecords / pageSize)

const fetchAndExportDataFromPage = async (model, collection, pageNumber) => {
const offset = (pageNumber - 1) * pageSize
const pageData = await model.findAll({
where: {districtID},
order: [['id', 'ASC']],
offset,
limit: pageSize,
transaction,
raw: true,
})
// Insert the common toponyms from the related page
await mongo.db.collection(collection).insertMany(pageData, {ordered: false})
const totalCommonToponymPages = Math.ceil(totalCommonToponymRecords / PAGE_SIZE)

// Helpers
// Helpers to calculate the fantoir code
const calculateCommonToponymFantoirCode = commonToponym => {
// Find the label in 'fra' if possible, otherwise take the first one
const labelValue = commonToponym?.labels?.find(({isoCode}) => isoCode === 'fra')?.value || commonToponym?.labels[0]?.value
const fantoirCode = fantoirFinder.findVoie(labelValue, cog)?.codeFantoir
// Store the fantoir code for the common toponym to be able to calculate the postal codes later
commonToponymIDFantoirCodeMap.set(commonToponym.id, fantoirCode)
return fantoirCode
}

// Helpers to calculate the tiles
const calculateCommonToponymTiles = commonToponym => {
const {centroid} = commonToponym
if (!centroid) {
return {}
}

const {crs, ...position} = centroid
const {tiles, x, y} = derivePositionProps(position, COMMON_TOPONYM_TILES_MIN_ZOOM, COMMON_TOPONYM_TILES_MAX_ZOOM)
return {tiles, x, y}
}

const calculateAddressTiles = address => {
const {positions} = address
// Find the position with the highest priority
const positionPrioritized = positions.reduce((max, item) => (POSITION_TYPES_PRIORITY[item.type] < POSITION_TYPES_PRIORITY[max.type] ? item : max), positions[0])
// Calculate the tiles for the position with the highest priority
const {tiles, x, y} = derivePositionProps(positionPrioritized?.geometry, ADDRESS_TILES_MIN_ZOOM, ADDRESS_TILES_MAX_ZOOM)
return {tiles, x, y}
}

// Helpers to calculate the postal code
const calculateCommonToponymPostalCode = commonToponym => {
const fantoirCode = commonToponymIDFantoirCodeMap.get(commonToponym.id)
const {codePostal} = findCodePostal(cog, fantoirCode)
return codePostal
}

const calculateAddressPostalCode = address => {
const fantoirCode = commonToponymIDFantoirCodeMap.get(address.mainCommonToponymID)
const {number, suffix} = address
const {codePostal} = findCodePostal(cog, fantoirCode, number, suffix)
return codePostal
}

const formatPageData = (pageData, type) => {
if (type === 'commonToponym') {
return pageData.map(commonToponym => {
// Calculate the fantoir code for each common toponym
const fantoirCode = calculateCommonToponymFantoirCode(commonToponym)
// Calculate the tiles for each common toponym
const {tiles, x, y} = calculateCommonToponymTiles(commonToponym)
// Calculate the postal code for each common toponym
const postalCode = calculateCommonToponymPostalCode(commonToponym)
// Remove the centroid data from the common toponym
const {centroid, ...commonToponymCleaned} = commonToponym
return {...commonToponymCleaned,
meta: {
...commonToponym.meta,
...(fantoirCode ? {dgfip: {...commonToponym.meta?.dgfip, fantoir: fantoirCode}} : {}),
...(tiles && x && y ? {geography: {...commonToponym.meta?.geography, tiles, x, y}} : {}),
...(postalCode ? {laposte: {...commonToponym.meta?.laposte, codePostal: postalCode}} : {})
}}
})
}

if (type === 'adresse') {
return pageData.map(address => {
// Calculate the tiles for each address
const {tiles, x, y} = calculateAddressTiles(address)
// Calculate the postal code for each address
const postalCode = calculateAddressPostalCode(address)
return {...address,
meta: {
...(tiles && x && y ? {geography: {...address.meta?.geography, tiles, x, y}} : {}),
...(postalCode ? {laposte: {...address.meta?.laposte, codePostal: postalCode}} : {})
}
}
})
}
}

const fetchAndExportDataFromPage = async (type, model, collection, pageNumber) => {
const offset = (pageNumber - 1) * PAGE_SIZE
let pageData = []
// Export the data from the page
if (type === 'commonToponym') {
[pageData] = await sequelize.query(commonToponymPageQuery, {
replacements: {districtID, offset, limit: PAGE_SIZE},
transaction,
raw: true,
})
} else if (type === 'adresse') {
pageData = await model.findAll({
where: {districtID},
order: [['id', 'ASC']],
offset,
limit: PAGE_SIZE,
transaction,
raw: true,
})
}

// Format the data and calculate the fantoir code, tiles and postal code
const formatedPageData = formatPageData(pageData, type)

// Insert the data in the collection
await mongo.db.collection(collection).insertMany(formatedPageData, {ordered: false})
}

const commonToponymsExportPromises = []

for (let pageNumber = 1; pageNumber <= totalCommonToponymPages; pageNumber++) {
commonToponymsExportPromises.push(
fetchAndExportDataFromPage(CommonToponym, CommonToponymCollection, pageNumber)
fetchAndExportDataFromPage('commonToponym', CommonToponym, COMMON_TOPONYM_COLLECTION, pageNumber)
)
}

await Promise.all(commonToponymsExportPromises)

// Address
// Count the total number of addresses and pages to process
const totalAddressRecords = await Address.count({
where: {districtID},
transaction,
})
const totalAddressPages = Math.ceil(totalAddressRecords / pageSize)
const totalAddressPages = Math.ceil(totalAddressRecords / PAGE_SIZE)

const addressesExportPromises = []

for (let pageNumber = 1; pageNumber <= totalAddressPages; pageNumber++) {
addressesExportPromises.push(
fetchAndExportDataFromPage(Address, AddressCollection, pageNumber)
fetchAndExportDataFromPage('adresse', Address, ADDRESS_COLLECTION, pageNumber)
)
}

Expand All @@ -94,8 +246,8 @@ export default async function exportToExploitationDB({data}) {

const deleteAllDataRelatedToDistrict = async districtID => {
await Promise.all([
mongo.db.collection(DistrictCollection).deleteOne({id: districtID}),
mongo.db.collection(CommonToponymCollection).deleteMany({districtID}),
mongo.db.collection(AddressCollection).deleteMany({districtID})
mongo.db.collection(DISTRICT_COLLECTION).deleteOne({id: districtID}),
mongo.db.collection(COMMON_TOPONYM_COLLECTION).deleteMany({districtID}),
mongo.db.collection(ADDRESS_COLLECTION).deleteMany({districtID})
])
}

0 comments on commit 001d625

Please sign in to comment.