Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calcul des codes postaux en utilisant Datanova et les contours postaux #439

Merged
merged 9 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ GAZETTEER_DB_PATH=data/gazetteer.sqlite
MAJIC_PATH=/data/majic.sqlite
CONTOURS_DATA_PATH=data/communes-50m.sqlite
COMMUNES_LOCAUX_ADRESSES_DATA_PATH=data/communes-locaux-adresses.json
CP_PATH=db-migrations/data/cp_group.geojson
DATANOVA_PATH=db-migrations/data/datanova_06.csv

# Others
DEPARTEMENTS= # Comma separated list of departements for dev only
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,5 @@ typings/
/toolbox.dev/data

# Migration data backup
db-migrations/migrations/data-backup
db-migrations/migrations/data-backup
db-migrations/data/
103 changes: 103 additions & 0 deletions db-migrations/migrations/20240619135943-init-postal-area-table.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
'use strict'

const fs = require('fs')
const {Transform} = require('stream')
const JSONStream = require('JSONStream')

const {POSTGRES_BAN_USER} = process.env
const {CP_PATH} = process.env

/** @type {import('sequelize-cli').Migration} */
module.exports = {
async up(queryInterface, Sequelize) {
await queryInterface.sequelize.query('CREATE SCHEMA IF NOT EXISTS external;')
await queryInterface.sequelize.query(`GRANT USAGE ON SCHEMA external TO "${POSTGRES_BAN_USER}";`)

await queryInterface.createTable('postal_area', {
id: {
type: Sequelize.INTEGER,
primaryKey: true,
autoIncrement: true,
allowNull: false,
},
postalCode: {
type: Sequelize.STRING,
allowNull: false,
},
inseeCom: {
type: Sequelize.STRING,
allowNull: false,
},
geometry: {
type: Sequelize.GEOMETRY,
allowNull: false,
},
createdAt: {
type: Sequelize.DATE,
allowNull: true,
},
updatedAt: {
type: Sequelize.DATE,
allowNull: true,
},
}, {
schema: 'external',
ifNotExists: true,
})

await queryInterface.sequelize.query(`
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW."updatedAt" = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
`)

await queryInterface.sequelize.query(`
CREATE TRIGGER update_postal_area_updated_at
BEFORE UPDATE ON external.postal_area
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
`)

const {sequelize} = queryInterface

const insertFeature = async feature => {
const {cp: postalCode, insee_com: inseeCom} = feature.properties
const geom = JSON.stringify(feature.geometry)
const query = `
INSERT INTO external.postal_area ("postalCode", "inseeCom", geometry, "createdAt", "updatedAt")
VALUES ($1, $2, ST_SetSRID(ST_GeomFromGeoJSON($3), 2154), NOW(), NOW())
`
await sequelize.query(query, {
bind: [postalCode, inseeCom, geom],
})
}

const stream = fs.createReadStream(CP_PATH)
.pipe(JSONStream.parse('features.*'))
.pipe(new Transform({
objectMode: true,
async transform(feature, encoding, callback) {
try {
await insertFeature(feature)
callback()
} catch (error) {
callback(error)
}
},
}))
return new Promise((resolve, reject) => {
stream.on('finish', resolve)
stream.on('error', reject)
})
},

async down(queryInterface, _Sequelize) {
await queryInterface.dropTable({tableName: 'postal_area', schema: 'external'})
await queryInterface.sequelize.query('DROP FUNCTION IF EXISTS update_updated_at_column() CASCADE;')
await queryInterface.sequelize.query('DROP SCHEMA IF EXISTS external CASCADE;')
},
}
140 changes: 140 additions & 0 deletions db-migrations/migrations/20240621094048-init-datanova-table.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
'use strict'

const fs = require('fs')
const path = require('path')
const Papa = require('papaparse')

const {DATANOVA_PATH} = process.env

module.exports = {
async up(queryInterface, Sequelize) {
const transaction = await queryInterface.sequelize.transaction()
try {
await queryInterface.sequelize.query('CREATE SCHEMA IF NOT EXISTS external', {transaction})

const {POSTGRES_BAN_USER} = process.env
await queryInterface.sequelize.query(`GRANT USAGE ON SCHEMA external TO "${POSTGRES_BAN_USER}"`, {transaction})

await queryInterface.createTable({
schema: 'external',
tableName: 'datanova'
}, {
id: {
type: Sequelize.INTEGER,
primaryKey: true,
autoIncrement: true
},
inseeCom: {
type: Sequelize.STRING,
allowNull: false,
},
postalCodes: {
type: Sequelize.ARRAY(Sequelize.STRING),
allowNull: false,
},
libelleAcheminementWithPostalCodes: {
type: Sequelize.TEXT,
allowNull: false,
},
createdAt: {
type: Sequelize.DATE,
allowNull: false,
defaultValue: Sequelize.fn('now')
},
updatedAt: {
type: Sequelize.DATE,
allowNull: false,
defaultValue: Sequelize.fn('now')
}
}, {transaction})

const csvFilePath = path.resolve(DATANOVA_PATH)

const csvFileContent = fs.readFileSync(csvFilePath, 'utf8')

console.log('CSV file read successfully')

const dataRaw = Papa.parse(csvFileContent, {
header: true,
transformHeader(name) {
switch (name.toLowerCase()) {
case 'code_commune_insee':
return 'codeInsee'
case 'nom_de_la_commune':
return 'nomCommune'
case 'code_postal':
return 'codePostal'
case 'libelle_d_acheminement':
return 'libelleAcheminement'
case 'ligne_5':
return 'ligne5'
case '_geopoint':
return 'geopoint'
default:
return name
}
},
skipEmptyLines: true,
})

console.log('CSV file parsed successfully')

const inseeDataMap = dataRaw.data.reduce((acc, {codeInsee, codePostal, libelleAcheminement}) => {
if (!acc[codeInsee]) {
acc[codeInsee] = {
inseeCom: codeInsee,
postalCodes: new Set(),
libelleAcheminementWithPostalCodes: {},
createdAt: new Date(),
updatedAt: new Date(),
}
}

acc[codeInsee].postalCodes.add(codePostal)
if (!acc[codeInsee].libelleAcheminementWithPostalCodes[codePostal]) {
acc[codeInsee].libelleAcheminementWithPostalCodes[codePostal] = libelleAcheminement
}

return acc
}, {})

const formattedData = Object.values(inseeDataMap).map(entry => ({
...entry,
postalCodes: [...entry.postalCodes],
libelleAcheminementWithPostalCodes: JSON.stringify(entry.libelleAcheminementWithPostalCodes)
}))

await queryInterface.bulkInsert({schema: 'external', tableName: 'datanova'}, formattedData, {transaction})
console.log('Data inserted successfully into external.datanova table')

// Convert the column to JSONB after insertion
await queryInterface.sequelize.query(`
ALTER TABLE external.datanova
ALTER COLUMN "libelleAcheminementWithPostalCodes"
TYPE JSONB USING "libelleAcheminementWithPostalCodes"::JSONB
`, {transaction})
console.log('Column libelleAcheminementWithPostalCodes converted to JSONB')

await transaction.commit()
} catch (error) {
await transaction.rollback()
console.error('Error during migration:', error)
}
},

async down(queryInterface) {
const transaction = await queryInterface.sequelize.transaction()
try {
await queryInterface.dropTable({schema: 'external', tableName: 'datanova'}, {transaction})
console.log('Table external.datanova dropped successfully')

await queryInterface.sequelize.query('DROP SCHEMA IF EXISTS external CASCADE', {transaction})
console.log('Schema external dropped successfully')

await transaction.commit()
} catch (error) {
await transaction.rollback()
console.error('Error during migration rollback:', error)
}
}
}
Loading
Loading