Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read agendas and insert into table reunions #5

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions releve_db_cli/NOTES_ANALYSE_TRICOTEUSES.md
Original file line number Diff line number Diff line change
@@ -8,6 +8,10 @@

# Autres liens

Des infos sur les différents datasets dans ce fichier
https://git.en-root.org/tricoteuses/tricoteuses-assemblee/-/blob/master/src/datasets.ts
Notamment quels datasets sont des sous ensembles de quel datasets, quels avantages ou inconvénients, etc.

Schemas JSON
https://git.en-root.org/tricoteuses/tricoteuses-assemblee/-/tree/master/src/schemas
Schémas très utiles car il y a plein de commentaires explicatifs sur chaque champ
@@ -16,6 +20,14 @@ Doc des schémas de l'assemblée
https://www.assemblee-nationale.fr/opendata/Schemas_Entites/AMO/Schemas_Organes.html#type-organe-parlemtaire
à explorer, mais tout n'est pas documenté apparemment



https://framagit.org/parlement-ouvert/schemas-assemblee/-/tree/master
Les Schémas XML des données de l'Assemblée nationale
À l'origine, ces schémas, fournis par l'Assemblée, décrivent un sur-ensemble des données publiées en open data sur le site de l'Assemblée nationale.
Ces schémas ont ensuite été mis à jour afin de pouvoir valider l'open data de l'Assemblée et ainsi tenter de détecter des changements dans les données.


La pipeline des Tricoteuses est expliquée dans un commentaire en bas de cette discussion
https://forum.en-root.org/t/evolution-de-la-chaine-de-nettoyage-des-donnees-brutes-de-lassemblee/84/9

@@ -162,3 +174,12 @@ https://git.en-root.org/Seb35/duralex-js
(lien trouvé dans le forum, je le note car ça pourra être utile un jour)

DuraLex is a grammar and a framework to convert legal modifying texts (like amendments and most bills articles) into an Abstract Syntax Tree – or more exactly a semantic tree – so that some automatic treatments become possible like creating diffs between the existing law and the proposed amended text.


Liste de projets/outils liés au droit, à la loi
https://forum.parlement-ouvert.fr/t/cartographie-des-outils-libres/582/3
https://revolunet.github.io/droit-libre/#0

forum de parlement ouvert, je crois que c'est lié à Paula Forteza
https://forum.parlement-ouvert.fr/
https://framagit.org/parlement-ouvert
2 changes: 2 additions & 0 deletions releve_db_cli/package.json
Original file line number Diff line number Diff line change
@@ -15,12 +15,14 @@
"@tsconfig/node18": "^1.0.1",
"@types/command-line-args": "^5.2.0",
"@types/command-line-usage": "^5.0.2",
"@types/glob": "^8.0.0",
"@types/lodash": "^4.14.190",
"@types/node": "^18.11.9",
"@types/pg": "^8.6.5",
"command-line-args": "^5.2.1",
"command-line-usage": "^6.1.3",
"dotenv": "^16.0.3",
"glob": "^8.0.3",
"kysely": "^0.22.0",
"lodash": "^4.17.21",
"node-fetch": "^3.3.0",
20 changes: 19 additions & 1 deletion releve_db_cli/sql/db_tables.sql
Original file line number Diff line number Diff line change
@@ -19,8 +19,26 @@ CREATE TABLE mandats (
organes_uids TEXT[] NOT NULL
);

-- dataset agenda : réunions de commissions, séance, réunions qconques entre parlementaires, etc.
DROP TABLE IF EXISTS reunions;
CREATE TABLE reunions (
uid text NOT NULL,
data jsonb NOT NULL,
-- legislature du dataset, i.e. 15 pour les réunions trouvées dans le dataset Agenda_XV
-- j'ai cru comprendre qu'il pouvait y avoir des réunions qui débordaient d'un dataset à l'autre en début/fin de législature, donc à vérifier si c'est fiable.
legislature INT NOT NULL,
-- le chemin qu'avait le fichier dans le dataset (sans le nom du fichier), par exemple SN/R5/L15/S2022/IDS/000/025
-- je pense que ce n'est pas utile, que toutes les infos sont sans doute trouvables dans le json lui-même
-- je garde quand même l'info pour le moment, pour vérifier
path_in_dataset TEXT NOT NULL,
-- Une réunion semble donc être parfois dans plusieurs législature
-- je fais comme ça pour le moment, mais on pourra surement dédoublonner la dernière version
PRIMARY KEY (uid, legislature)
);

DROP TABLE IF EXISTS nosdeputes_deputes;
CREATE TABLE nosdeputes_deputes (
uid text PRIMARY KEY NOT NULL,
slug text NOT NULL UNIQUE
);
);

51 changes: 27 additions & 24 deletions releve_db_cli/src/sandbox.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
import fs from 'fs'
import { CliArgs } from './utils/cli'
import _ from 'lodash'
import path from 'path'
import { AM030 } from './utils/datasets'
import { readFileAsJson, readFilesInSubdir } from './utils/utils'
import { sql } from 'kysely'
import { getDb } from './utils/db'
import { listFilesRecursively, readFileAsJson } from './utils/utils'

// Exemple de commande jq pour explorer les fichiers JSON clonés des tricoteuses en ligne de commande
// find ../data.tricoteuses.fr/Agenda_XIV/ -name '*.json' | xargs jq 'select(.timestampDebut < "2017-06-21") | "\(.uid) \(.timestampDebut)"'

export function sandbox(args: CliArgs) {
const { workdir } = args

const datasetPath = './tmp/tricoteuses/Agenda_XV'

const files = listFilesRecursively(datasetPath)
for (const f of files) {
const json = readFileAsJson(f)
const pathInDataset = f
.substring(datasetPath.length + 1)
.replace(/\/[^/]*\.json$/, '')
}

// XVI

// dossiers qui changent :
// SN ou AN
// l'année
// le 024-25-etc.
//
// ./tmp/tricoteuses/Agenda_XV/AN/R5/L15/S2021/IDS/000/024/RUANR5L15S2021IDS24341.json
// ./tmp/tricoteuses/Agenda_XV/SN/R5/L15/S2022/IDS/000/025/RUSNR5L15S2022IDS25657.json

// XVI: toutes les seance sont dans dans un path comme ça
// ./tmp/tricoteuses/Agenda_XVI/AN/R5/L16/S2022/IDS/000/026/RUANR5L16S2022IDS26177.json

// TODO check if same depute in NosDeputes can have with different slug in different legislatures
// ==> OUI examples

@@ -25,23 +45,6 @@ export function sandbox(args: CliArgs) {
// Donc il faudra prendre le slug pour chaque député dans leur dernière législature

// TODO reorganize that and put it in the CLI. Put the NosDeputes files in subfolder in ./tmp

const deputesWithLegislature = [13, 14, 15, 16].flatMap(legislature => {
const deputes = readDeputesJsonFromNosDeputes(legislature)
return deputes.map(depute => ({ ...depute, legislature }))
})

const subdir = path.join(workdir, AM030, 'acteurs')
const files = readFilesInSubdir(subdir)
for (const f of files) {
const json = readFileAsJson(path.join(subdir, f))
}

deputesWithLegislature.forEach(depute => {
if (!files.includes(`PA${depute.id_an}.json`)) {
console.log('not found', depute.id_an, depute.slug, depute.legislature)
}
})
}

function readDeputesJsonFromNosDeputes(legislature: number) {
36 changes: 35 additions & 1 deletion releve_db_cli/src/tricoteuses/tricoteusesInsert.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import path from 'path'
import { rewriteAdresses } from '../nosdeputes/rewriteAdresses'
import { CliArgs } from '../utils/cli'
import { AM030 } from '../utils/datasets'
import { AGENDA_14, AGENDA_15, AGENDA_16, AM030 } from '../utils/datasets'
import { getDb } from '../utils/db'
import {
listFilesRecursively,
readFileAsJson,
readFilesInSubdir,
truncateTable,
@@ -13,6 +14,7 @@ export async function tricoteusesInsert(args: CliArgs) {
await insertAllActeursOfAm030(args)
await insertAllOrganesOfAm030(args)
await insertAllMandatsOfAm030(args)
await insertAllFromAgendas(args)
}

function getAm030Path(args: CliArgs) {
@@ -87,3 +89,35 @@ async function insertAllMandatsOfAm030(args: CliArgs) {
}
console.log('Done')
}

async function insertAllFromAgendas(args: CliArgs) {
const table = 'reunions'
truncateTable(table)
const datasetsAndLegislature = [
[AGENDA_14, 14],
[AGENDA_15, 15],
[AGENDA_16, 16],
] as const
for (const [dataset, legislature] of datasetsAndLegislature) {
const datasetPath = path.join(args.workdir, 'tricoteuses', dataset)
const files = listFilesRecursively(datasetPath)
console.log(`Inserting these into table ${table}`)
for (const f of files) {
const path_in_dataset = f
.substring(datasetPath.length + 1)
.replace(/\/[^/]*\.json$/, '')
const json = readFileAsJson(f)
const uid = json.uid as string
await getDb()
.insertInto(table)
.values({
uid,
path_in_dataset,
legislature,
data: json,
})
.execute()
}
console.log('Done')
}
}
9 changes: 9 additions & 0 deletions releve_db_cli/src/utils/datasets.ts
Original file line number Diff line number Diff line change
@@ -5,6 +5,10 @@ export const AM030_16 =
'AMO40_deputes_actifs_mandats_actifs_organes_divises_XVI'
export const AM030_15 = 'AMO40_deputes_actifs_mandats_actifs_organes_divises_XV'

export const AGENDA_14 = 'Agenda_XIV'
export const AGENDA_15 = 'Agenda_XV'
export const AGENDA_16 = 'Agenda_XVI'

export const datasetsForRegardsCitoyens = [
// contient tous les acteurs + organes avec historique
AM030,
@@ -20,6 +24,11 @@ export const datasetsForRegardsCitoyens = [
AM030_16,
// idem
AM030_15,

// toutes les réunions et séances (mais pas ce qui s'est dit, ni les participants ?)
AGENDA_14,
AGENDA_15,
AGENDA_16,
]

// Autres repos qu'il faudra ensuite surement intégrer :
6 changes: 6 additions & 0 deletions releve_db_cli/src/utils/db.ts
Original file line number Diff line number Diff line change
@@ -40,6 +40,12 @@ export interface NosDeputesDatabase {
acteur_uid: string
organes_uids: string[]
}
reunions: {
uid: string
data: unknown
legislature: number
path_in_dataset: string
}
nosdeputes_deputes: {
uid: string
slug: string
10 changes: 10 additions & 0 deletions releve_db_cli/src/utils/utils.ts
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@ import fs from 'fs'
import { sql } from 'kysely'
import path from 'path'
import { getDb } from './db'
import glob from 'glob'

export function readFromEnv(name: string): string {
const value = process.env[name]
@@ -68,3 +69,12 @@ export async function truncateTable(tableName: string) {
console.log(`Emptying ${tableName} table`)
await sql`TRUNCATE TABLE ${sql.raw(tableName)}`.execute(getDb())
}

// The returned file paths will be relative to the current working directory
// (not to the given dirPath)
export function listFilesRecursively(dirPath: string): string[] {
console.log(`Reading files in ${dirPath} recursively`)
const filePaths = glob.sync(`${dirPath}/**/*`, { nodir: true })
console.log(`${filePaths.length} files found`)
return filePaths
}
73 changes: 73 additions & 0 deletions releve_db_cli/yarn.lock
Original file line number Diff line number Diff line change
@@ -62,11 +62,24 @@
resolved "https://registry.yarnpkg.com/@types/command-line-usage/-/command-line-usage-5.0.2.tgz#ba5e3f6ae5a2009d466679cc431b50635bf1a064"
integrity sha512-n7RlEEJ+4x4TS7ZQddTmNSxP+zziEG0TNsMfiRIxcIVXt71ENJ9ojeXmGO3wPoTdn7pJcU2xc3CJYMktNT6DPg==

"@types/glob@^8.0.0":
version "8.0.0"
resolved "https://registry.yarnpkg.com/@types/glob/-/glob-8.0.0.tgz#321607e9cbaec54f687a0792b2d1d370739455d2"
integrity sha512-l6NQsDDyQUVeoTynNpC9uRvCUint/gSUXQA2euwmTuWGvPY5LSDUu6tkCtJB2SvGQlJQzLaKqcGZP4//7EDveA==
dependencies:
"@types/minimatch" "*"
"@types/node" "*"

"@types/lodash@^4.14.190":
version "4.14.190"
resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.190.tgz#d8e99647af141c63902d0ca53cf2b34d2df33545"
integrity sha512-5iJ3FBJBvQHQ8sFhEhJfjUP+G+LalhavTkYyrAYqz5MEJG+erSv0k9KJLb6q7++17Lafk1scaTIFXcMJlwK8Mw==

"@types/minimatch@*":
version "5.1.2"
resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-5.1.2.tgz#07508b45797cb81ec3f273011b054cd0755eddca"
integrity sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==

"@types/node@*", "@types/node@^18.11.9":
version "18.11.9"
resolved "https://registry.yarnpkg.com/@types/node/-/node-18.11.9.tgz#02d013de7058cea16d36168ef2fc653464cfbad4"
@@ -113,6 +126,18 @@ array-back@^4.0.1, array-back@^4.0.2:
resolved "https://registry.yarnpkg.com/array-back/-/array-back-4.0.2.tgz#8004e999a6274586beeb27342168652fdb89fa1e"
integrity sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==

balanced-match@^1.0.0:
version "1.0.2"
resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==

brace-expansion@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae"
integrity sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==
dependencies:
balanced-match "^1.0.0"

[email protected]:
version "2.0.0"
resolved "https://registry.yarnpkg.com/buffer-writer/-/buffer-writer-2.0.0.tgz#ce7eb81a38f7829db09c873f2fbb792c0c98ec04"
@@ -211,11 +236,40 @@ formdata-polyfill@^4.0.10:
dependencies:
fetch-blob "^3.1.2"

fs.realpath@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==

glob@^8.0.3:
version "8.0.3"
resolved "https://registry.yarnpkg.com/glob/-/glob-8.0.3.tgz#415c6eb2deed9e502c68fa44a272e6da6eeca42e"
integrity sha512-ull455NHSHI/Y1FqGaaYFaLGkNMMJbavMrEGFXG/PGrg6y7sutWHUHrz6gy6WEBH6akM1M414dWKCNs+IhKdiQ==
dependencies:
fs.realpath "^1.0.0"
inflight "^1.0.4"
inherits "2"
minimatch "^5.0.1"
once "^1.3.0"

has-flag@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd"
integrity sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==

inflight@^1.0.4:
version "1.0.6"
resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
integrity sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==
dependencies:
once "^1.3.0"
wrappy "1"

inherits@2:
version "2.0.4"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==

kysely@^0.22.0:
version "0.22.0"
resolved "https://registry.yarnpkg.com/kysely/-/kysely-0.22.0.tgz#8aac53942da3cadc604d7d154a746d983fe8f7b9"
@@ -236,6 +290,13 @@ make-error@^1.1.1:
resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2"
integrity sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==

minimatch@^5.0.1:
version "5.1.1"
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-5.1.1.tgz#6c9dffcf9927ff2a31e74b5af11adf8b9604b022"
integrity sha512-362NP+zlprccbEt/SkxKfRMHnNY85V74mVnpUpNyr3F35covl09Kec7/sEFLt3RA4oXmewtoaanoIf67SE5Y5g==
dependencies:
brace-expansion "^2.0.1"

node-domexception@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5"
@@ -250,6 +311,13 @@ node-fetch@^3.3.0:
fetch-blob "^3.1.4"
formdata-polyfill "^4.0.10"

once@^1.3.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
integrity sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==
dependencies:
wrappy "1"

[email protected]:
version "1.0.0"
resolved "https://registry.yarnpkg.com/packet-reader/-/packet-reader-1.0.0.tgz#9238e5480dedabacfe1fe3f2771063f164157d74"
@@ -417,6 +485,11 @@ wordwrapjs@^4.0.0:
reduce-flatten "^2.0.0"
typical "^5.2.0"

wrappy@1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==

xtend@^4.0.0:
version "4.0.2"
resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"