Skip to content

Commit

Permalink
Merge pull request #439 from BaseAdresseNationale/jugurtha/integrer-l…
Browse files Browse the repository at this point in the history
…es-contours-postaux-dans-pg

Calcul des codes postaux en utilisant Datanova et les contours postaux
  • Loading branch information
jbouhadoun authored Jul 8, 2024
2 parents 8144be1 + 71409df commit 1997d5a
Show file tree
Hide file tree
Showing 7 changed files with 643 additions and 84 deletions.
2 changes: 2 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ GAZETTEER_DB_PATH=data/gazetteer.sqlite
MAJIC_PATH=/data/majic.sqlite
CONTOURS_DATA_PATH=data/communes-50m.sqlite
COMMUNES_LOCAUX_ADRESSES_DATA_PATH=data/communes-locaux-adresses.json
CP_PATH=db-migrations/data/cp_group.geojson
DATANOVA_PATH=db-migrations/data/datanova_06.csv

# Others
DEPARTEMENTS= # Comma separated list of departements for dev only
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,5 @@ typings/
/toolbox.dev/data

# Migration data backup
db-migrations/migrations/data-backup
db-migrations/migrations/data-backup
db-migrations/data/
103 changes: 103 additions & 0 deletions db-migrations/migrations/20240619135943-init-postal-area-table.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
'use strict'

const fs = require('fs')
const {Transform} = require('stream')
const JSONStream = require('JSONStream')

const {POSTGRES_BAN_USER} = process.env
const {CP_PATH} = process.env

/** @type {import('sequelize-cli').Migration} */
module.exports = {
async up(queryInterface, Sequelize) {
await queryInterface.sequelize.query('CREATE SCHEMA IF NOT EXISTS external;')
await queryInterface.sequelize.query(`GRANT USAGE ON SCHEMA external TO "${POSTGRES_BAN_USER}";`)

await queryInterface.createTable('postal_area', {
id: {
type: Sequelize.INTEGER,
primaryKey: true,
autoIncrement: true,
allowNull: false,
},
postalCode: {
type: Sequelize.STRING,
allowNull: false,
},
inseeCom: {
type: Sequelize.STRING,
allowNull: false,
},
geometry: {
type: Sequelize.GEOMETRY,
allowNull: false,
},
createdAt: {
type: Sequelize.DATE,
allowNull: true,
},
updatedAt: {
type: Sequelize.DATE,
allowNull: true,
},
}, {
schema: 'external',
ifNotExists: true,
})

await queryInterface.sequelize.query(`
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW."updatedAt" = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
`)

await queryInterface.sequelize.query(`
CREATE TRIGGER update_postal_area_updated_at
BEFORE UPDATE ON external.postal_area
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
`)

const {sequelize} = queryInterface

const insertFeature = async feature => {
const {cp: postalCode, insee_com: inseeCom} = feature.properties
const geom = JSON.stringify(feature.geometry)
const query = `
INSERT INTO external.postal_area ("postalCode", "inseeCom", geometry, "createdAt", "updatedAt")
VALUES ($1, $2, ST_SetSRID(ST_GeomFromGeoJSON($3), 2154), NOW(), NOW())
`
await sequelize.query(query, {
bind: [postalCode, inseeCom, geom],
})
}

const stream = fs.createReadStream(CP_PATH)
.pipe(JSONStream.parse('features.*'))
.pipe(new Transform({
objectMode: true,
async transform(feature, encoding, callback) {
try {
await insertFeature(feature)
callback()
} catch (error) {
callback(error)
}
},
}))
return new Promise((resolve, reject) => {
stream.on('finish', resolve)
stream.on('error', reject)
})
},

async down(queryInterface, _Sequelize) {
await queryInterface.dropTable({tableName: 'postal_area', schema: 'external'})
await queryInterface.sequelize.query('DROP FUNCTION IF EXISTS update_updated_at_column() CASCADE;')
await queryInterface.sequelize.query('DROP SCHEMA IF EXISTS external CASCADE;')
},
}
140 changes: 140 additions & 0 deletions db-migrations/migrations/20240621094048-init-datanova-table.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
'use strict'

const fs = require('fs')
const path = require('path')
const Papa = require('papaparse')

const {DATANOVA_PATH} = process.env

module.exports = {
async up(queryInterface, Sequelize) {
const transaction = await queryInterface.sequelize.transaction()
try {
await queryInterface.sequelize.query('CREATE SCHEMA IF NOT EXISTS external', {transaction})

const {POSTGRES_BAN_USER} = process.env
await queryInterface.sequelize.query(`GRANT USAGE ON SCHEMA external TO "${POSTGRES_BAN_USER}"`, {transaction})

await queryInterface.createTable({
schema: 'external',
tableName: 'datanova'
}, {
id: {
type: Sequelize.INTEGER,
primaryKey: true,
autoIncrement: true
},
inseeCom: {
type: Sequelize.STRING,
allowNull: false,
},
postalCodes: {
type: Sequelize.ARRAY(Sequelize.STRING),
allowNull: false,
},
libelleAcheminementWithPostalCodes: {
type: Sequelize.TEXT,
allowNull: false,
},
createdAt: {
type: Sequelize.DATE,
allowNull: false,
defaultValue: Sequelize.fn('now')
},
updatedAt: {
type: Sequelize.DATE,
allowNull: false,
defaultValue: Sequelize.fn('now')
}
}, {transaction})

const csvFilePath = path.resolve(DATANOVA_PATH)

const csvFileContent = fs.readFileSync(csvFilePath, 'utf8')

console.log('CSV file read successfully')

const dataRaw = Papa.parse(csvFileContent, {
header: true,
transformHeader(name) {
switch (name.toLowerCase()) {
case 'code_commune_insee':
return 'codeInsee'
case 'nom_de_la_commune':
return 'nomCommune'
case 'code_postal':
return 'codePostal'
case 'libelle_d_acheminement':
return 'libelleAcheminement'
case 'ligne_5':
return 'ligne5'
case '_geopoint':
return 'geopoint'
default:
return name
}
},
skipEmptyLines: true,
})

console.log('CSV file parsed successfully')

const inseeDataMap = dataRaw.data.reduce((acc, {codeInsee, codePostal, libelleAcheminement}) => {
if (!acc[codeInsee]) {
acc[codeInsee] = {
inseeCom: codeInsee,
postalCodes: new Set(),
libelleAcheminementWithPostalCodes: {},
createdAt: new Date(),
updatedAt: new Date(),
}
}

acc[codeInsee].postalCodes.add(codePostal)
if (!acc[codeInsee].libelleAcheminementWithPostalCodes[codePostal]) {
acc[codeInsee].libelleAcheminementWithPostalCodes[codePostal] = libelleAcheminement
}

return acc
}, {})

const formattedData = Object.values(inseeDataMap).map(entry => ({
...entry,
postalCodes: [...entry.postalCodes],
libelleAcheminementWithPostalCodes: JSON.stringify(entry.libelleAcheminementWithPostalCodes)
}))

await queryInterface.bulkInsert({schema: 'external', tableName: 'datanova'}, formattedData, {transaction})
console.log('Data inserted successfully into external.datanova table')

// Convert the column to JSONB after insertion
await queryInterface.sequelize.query(`
ALTER TABLE external.datanova
ALTER COLUMN "libelleAcheminementWithPostalCodes"
TYPE JSONB USING "libelleAcheminementWithPostalCodes"::JSONB
`, {transaction})
console.log('Column libelleAcheminementWithPostalCodes converted to JSONB')

await transaction.commit()
} catch (error) {
await transaction.rollback()
console.error('Error during migration:', error)
}
},

async down(queryInterface) {
const transaction = await queryInterface.sequelize.transaction()
try {
await queryInterface.dropTable({schema: 'external', tableName: 'datanova'}, {transaction})
console.log('Table external.datanova dropped successfully')

await queryInterface.sequelize.query('DROP SCHEMA IF EXISTS external CASCADE', {transaction})
console.log('Schema external dropped successfully')

await transaction.commit()
} catch (error) {
await transaction.rollback()
console.error('Error during migration rollback:', error)
}
}
}
Loading

0 comments on commit 1997d5a

Please sign in to comment.