From 08affedfdbf2483f9fede4e68ef512523df34f44 Mon Sep 17 00:00:00 2001 From: Nicolas Thouvenin Date: Mon, 4 Sep 2023 12:09:49 +0200 Subject: [PATCH 1/5] Update Inspire Loader --- workers/loaders/json-inspirehep.ini | 171 +++++++++++++--------------- 1 file changed, 77 insertions(+), 94 deletions(-) diff --git a/workers/loaders/json-inspirehep.ini b/workers/loaders/json-inspirehep.ini index 4346f2dfb..87d4b9c6c 100644 --- a/workers/loaders/json-inspirehep.ini +++ b/workers/loaders/json-inspirehep.ini @@ -1,94 +1,77 @@ -#loader json-inspirehep.ini (version novembre 2022 - Inist-CNRS) - -append = pack -label = json-inspirehep - -# load some plugins to activate some statements -[use] -plugin = basics - -# Toggle ezs traces (see server stderr log) -[debug] -ezs = false - -[JSONParse] -separator = hits.hits.* - -[replace] -path = id -value = get('id') - -path = Titre -value = get('metadata.titles[0].title') - -path = Type de document -value = get('metadata.document_type') - -path = Année de publication -value = get('metadata.publication_info[0].year') - -path = Revue -value = get('metadata.publication_info[0].journal_title') - -path = Issue -value = get('metadata.publication_info[0].journal_issue') - -path = N° de volume -value = get('metadata.publication_info[0].journal_volume') - -path = N° conférence -value = get('metadata.publication_info[0].cnum') - -path = Auteurs -value = get('metadata.authors').map(author => ({ full_name: author.full_name, affiliations: author.affiliations ? author.affiliations.map(aff => ({ label: aff.value || "", url: ( aff.record ? aff.record["$ref"] : "q" ) }) ) : [] } )) - -path = Catégorie inspire -value = get('metadata.inspire_categories').map(cat => cat.term).uniq() - -path = Expériences -value = get('metadata.accelerator_experiments').map(equ => equ.legacy_name).uniq() - -path = Collaborations -value = get('metadata.collaborations').map(col => col.value).uniq() - -path = Résumé -value = get('metadata.abstracts[0].value') - -path = DOI -value = get('metadata.dois').map(doi => doi.value).uniq() - -[assign] -path = UrlsLabo -value = get('Auteurs').map(author => author.affiliations.map(aff => aff.url)).flatten().uniq() - -[assign] -path = codesLabos -value = get("UrlsLabo").map(url => String(url).split("/").slice(-1)[0]) - -[assign] -path = uri -value = get('id') - -# Ensures that each object contains an identification key (required by lodex) -[swing] -test = pick(['URI', 'uri']).pickBy(_.identity).isEmpty() -[swing/identify] - -# Prevent keys from containing dot path notation (which is forbidden by nodejs mongoDB driver) -[OBJFlatten] -separator = fix('.') -reverse = true -safe = true - -# Uncomment to see each data sent to the database -#[debug] - -# Add contextual metadata related to the import -[assign] -path = lodexStamp.importedDate -value = fix(new Date()).thru(d => d.toDateString()) -path = lodexStamp.usedParser -value = env('parser') -path = lodexStamp.uploadedFilename -value = env('source') - +#loader json-inspirehep.ini (version juin 2023 - Inist-CNRS) + +append = dump +label = json-inspirehep + +[use] +plugin = basics +plugin = lodex + +[JSONParse] +separator = hits.hits.* + +[replace] +path = id +value = get('id') + +path = Titre +value = get('metadata.titles[0].title') +#1e occurrence + +path = Type de document +value = get('metadata.document_type') + +path = Année de publication +value = get('metadata.publication_info[0].year') +#1e occurrence + +path = Revue +value = get('metadata.publication_info[0].journal_title') +#1e occurrence + +path = Issue +value = get('metadata.publication_info[0].journal_issue') +#1e occurrence + +path = N° de volume +value = get('metadata.publication_info[0].journal_volume') +#1e occurrence + +path = N° conférence +value = get('metadata.publication_info[0].cnum') +#1e occurrence + +path = Auteurs +value = get('metadata.authors').map(author => ({ full_name: author.full_name, affiliations: author.affiliations ? author.affiliations.map(aff => ({ label: aff.value || "", url: ( aff.record ? aff.record["$ref"] : "empty" ) }) ) : [] } )) + +path = Catégorie inspire +value = get('metadata.inspire_categories').map(cat => cat.term).uniq() + +path = Expériences +value = get('metadata.accelerator_experiments').map(equ => equ.legacy_name).uniq() + +path = Collaborations +value = get('metadata.collaborations').map(col => col.value).uniq() + +path = Résumé +value = get('metadata.abstracts[0].value') +#1e occurrence + +path = DOI +value = get('metadata.dois').map(doi => doi.value).uniq() + +[assign] +path = UrlsLabo +value = get('Auteurs').map(author => author.affiliations.filter(aff => aff.url!=="empty").map(aff => aff.url)).flatten().uniq() + +[assign] +path = codesLabos +value = get("UrlsLabo").map(url => url.split("/").slice(-1)[0]) + +[OBJFlatten] +separator = / + +[assign] +path = uri +value = get('id') +#constitution identifiant URI LOdex à partir de l'ID From 37d0085b74ccdd37499effcb31c193390aef29f0 Mon Sep 17 00:00:00 2001 From: Nicolas Thouvenin Date: Mon, 4 Sep 2023 15:51:56 +0200 Subject: [PATCH 2/5] Update workers/loaders/json-inspirehep.ini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Parmentier --- workers/loaders/json-inspirehep.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/workers/loaders/json-inspirehep.ini b/workers/loaders/json-inspirehep.ini index 87d4b9c6c..09b9efb33 100644 --- a/workers/loaders/json-inspirehep.ini +++ b/workers/loaders/json-inspirehep.ini @@ -5,7 +5,6 @@ label = json-inspirehep [use] plugin = basics -plugin = lodex [JSONParse] separator = hits.hits.* From e140592603e7ea3ce512c68a061c9ed94e677340 Mon Sep 17 00:00:00 2001 From: Nicolas Thouvenin Date: Mon, 4 Sep 2023 16:27:00 +0200 Subject: [PATCH 3/5] Update json-inspirehep.ini MAndatory stantements --- workers/loaders/json-inspirehep.ini | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/workers/loaders/json-inspirehep.ini b/workers/loaders/json-inspirehep.ini index 09b9efb33..86171d20c 100644 --- a/workers/loaders/json-inspirehep.ini +++ b/workers/loaders/json-inspirehep.ini @@ -74,3 +74,26 @@ separator = / path = uri value = get('id') #constitution identifiant URI LOdex à partir de l'ID + +# Ensures that each object contains an identification key (required by lodex) +[swing] +test = pick(['URI', 'uri']).pickBy(_.identity).isEmpty() +[swing/identify] + +# Prevent keys from containing dot path notation (which is forbidden by nodejs mongoDB driver) +[OBJFlatten] +separator = fix('.') +reverse = true +safe = true + +# Uncomment to see each data sent to the database +#[debug] + +# Add contextual metadata related to the import +[assign] +path = lodexStamp.importedDate +value = fix(new Date()).thru(d => d.toDateString()) +path = lodexStamp.usedParser +value = env('parser') +path = lodexStamp.uploadedFilename +value = env('source') From f496dd562947c73563df185cb49ae125a30b636f Mon Sep 17 00:00:00 2001 From: Nicolas Thouvenin Date: Mon, 4 Sep 2023 16:28:27 +0200 Subject: [PATCH 4/5] unix format --- workers/loaders/json-inspirehep.ini | 198 ++++++++++++++-------------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/workers/loaders/json-inspirehep.ini b/workers/loaders/json-inspirehep.ini index 86171d20c..033b7b98e 100644 --- a/workers/loaders/json-inspirehep.ini +++ b/workers/loaders/json-inspirehep.ini @@ -1,99 +1,99 @@ -#loader json-inspirehep.ini (version juin 2023 - Inist-CNRS) - -append = dump -label = json-inspirehep - -[use] -plugin = basics - -[JSONParse] -separator = hits.hits.* - -[replace] -path = id -value = get('id') - -path = Titre -value = get('metadata.titles[0].title') -#1e occurrence - -path = Type de document -value = get('metadata.document_type') - -path = Année de publication -value = get('metadata.publication_info[0].year') -#1e occurrence - -path = Revue -value = get('metadata.publication_info[0].journal_title') -#1e occurrence - -path = Issue -value = get('metadata.publication_info[0].journal_issue') -#1e occurrence - -path = N° de volume -value = get('metadata.publication_info[0].journal_volume') -#1e occurrence - -path = N° conférence -value = get('metadata.publication_info[0].cnum') -#1e occurrence - -path = Auteurs -value = get('metadata.authors').map(author => ({ full_name: author.full_name, affiliations: author.affiliations ? author.affiliations.map(aff => ({ label: aff.value || "", url: ( aff.record ? aff.record["$ref"] : "empty" ) }) ) : [] } )) - -path = Catégorie inspire -value = get('metadata.inspire_categories').map(cat => cat.term).uniq() - -path = Expériences -value = get('metadata.accelerator_experiments').map(equ => equ.legacy_name).uniq() - -path = Collaborations -value = get('metadata.collaborations').map(col => col.value).uniq() - -path = Résumé -value = get('metadata.abstracts[0].value') -#1e occurrence - -path = DOI -value = get('metadata.dois').map(doi => doi.value).uniq() - -[assign] -path = UrlsLabo -value = get('Auteurs').map(author => author.affiliations.filter(aff => aff.url!=="empty").map(aff => aff.url)).flatten().uniq() - -[assign] -path = codesLabos -value = get("UrlsLabo").map(url => url.split("/").slice(-1)[0]) - -[OBJFlatten] -separator = / - -[assign] -path = uri -value = get('id') -#constitution identifiant URI LOdex à partir de l'ID - -# Ensures that each object contains an identification key (required by lodex) -[swing] -test = pick(['URI', 'uri']).pickBy(_.identity).isEmpty() -[swing/identify] - -# Prevent keys from containing dot path notation (which is forbidden by nodejs mongoDB driver) -[OBJFlatten] -separator = fix('.') -reverse = true -safe = true - -# Uncomment to see each data sent to the database -#[debug] - -# Add contextual metadata related to the import -[assign] -path = lodexStamp.importedDate -value = fix(new Date()).thru(d => d.toDateString()) -path = lodexStamp.usedParser -value = env('parser') -path = lodexStamp.uploadedFilename -value = env('source') +#loader json-inspirehep.ini (version juin 2023 - Inist-CNRS) + +append = dump +label = json-inspirehep + +[use] +plugin = basics + +[JSONParse] +separator = hits.hits.* + +[replace] +path = id +value = get('id') + +path = Titre +value = get('metadata.titles[0].title') +#1e occurrence + +path = Type de document +value = get('metadata.document_type') + +path = Année de publication +value = get('metadata.publication_info[0].year') +#1e occurrence + +path = Revue +value = get('metadata.publication_info[0].journal_title') +#1e occurrence + +path = Issue +value = get('metadata.publication_info[0].journal_issue') +#1e occurrence + +path = N° de volume +value = get('metadata.publication_info[0].journal_volume') +#1e occurrence + +path = N° conférence +value = get('metadata.publication_info[0].cnum') +#1e occurrence + +path = Auteurs +value = get('metadata.authors').map(author => ({ full_name: author.full_name, affiliations: author.affiliations ? author.affiliations.map(aff => ({ label: aff.value || "", url: ( aff.record ? aff.record["$ref"] : "empty" ) }) ) : [] } )) + +path = Catégorie inspire +value = get('metadata.inspire_categories').map(cat => cat.term).uniq() + +path = Expériences +value = get('metadata.accelerator_experiments').map(equ => equ.legacy_name).uniq() + +path = Collaborations +value = get('metadata.collaborations').map(col => col.value).uniq() + +path = Résumé +value = get('metadata.abstracts[0].value') +#1e occurrence + +path = DOI +value = get('metadata.dois').map(doi => doi.value).uniq() + +[assign] +path = UrlsLabo +value = get('Auteurs').map(author => author.affiliations.filter(aff => aff.url!=="empty").map(aff => aff.url)).flatten().uniq() + +[assign] +path = codesLabos +value = get("UrlsLabo").map(url => url.split("/").slice(-1)[0]) + +[OBJFlatten] +separator = / + +[assign] +path = uri +value = get('id') +#constitution identifiant URI LOdex à partir de l'ID + +# Ensures that each object contains an identification key (required by lodex) +[swing] +test = pick(['URI', 'uri']).pickBy(_.identity).isEmpty() +[swing/identify] + +# Prevent keys from containing dot path notation (which is forbidden by nodejs mongoDB driver) +[OBJFlatten] +separator = fix('.') +reverse = true +safe = true + +# Uncomment to see each data sent to the database +#[debug] + +# Add contextual metadata related to the import +[assign] +path = lodexStamp.importedDate +value = fix(new Date()).thru(d => d.toDateString()) +path = lodexStamp.usedParser +value = env('parser') +path = lodexStamp.uploadedFilename +value = env('source') From 67fb3e31429a9a5b1e193b746b5db035d6f4ba95 Mon Sep 17 00:00:00 2001 From: Nicolas Thouvenin Date: Mon, 4 Sep 2023 16:29:55 +0200 Subject: [PATCH 5/5] now, lodex use json to exchange data --- workers/loaders/json-inspirehep.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/loaders/json-inspirehep.ini b/workers/loaders/json-inspirehep.ini index 033b7b98e..27fa95d59 100644 --- a/workers/loaders/json-inspirehep.ini +++ b/workers/loaders/json-inspirehep.ini @@ -1,6 +1,6 @@ #loader json-inspirehep.ini (version juin 2023 - Inist-CNRS) -append = dump +append = pack label = json-inspirehep [use]