Skip to content

Commit

Permalink
Update Inspire Loader
Browse files Browse the repository at this point in the history
  • Loading branch information
touv committed Sep 4, 2023
1 parent d02f8ce commit 08affed
Showing 1 changed file with 77 additions and 94 deletions.
171 changes: 77 additions & 94 deletions workers/loaders/json-inspirehep.ini
Original file line number Diff line number Diff line change
@@ -1,94 +1,77 @@
#loader json-inspirehep.ini (version novembre 2022 - Inist-CNRS)

append = pack
label = json-inspirehep

# load some plugins to activate some statements
[use]
plugin = basics

# Toggle ezs traces (see server stderr log)
[debug]
ezs = false

[JSONParse]
separator = hits.hits.*

[replace]
path = id
value = get('id')

path = Titre
value = get('metadata.titles[0].title')

path = Type de document
value = get('metadata.document_type')

path = Année de publication
value = get('metadata.publication_info[0].year')

path = Revue
value = get('metadata.publication_info[0].journal_title')

path = Issue
value = get('metadata.publication_info[0].journal_issue')

path = N° de volume
value = get('metadata.publication_info[0].journal_volume')

path = N° conférence
value = get('metadata.publication_info[0].cnum')

path = Auteurs
value = get('metadata.authors').map(author => ({ full_name: author.full_name, affiliations: author.affiliations ? author.affiliations.map(aff => ({ label: aff.value || "", url: ( aff.record ? aff.record["$ref"] : "q" ) }) ) : [] } ))

path = Catégorie inspire
value = get('metadata.inspire_categories').map(cat => cat.term).uniq()

path = Expériences
value = get('metadata.accelerator_experiments').map(equ => equ.legacy_name).uniq()

path = Collaborations
value = get('metadata.collaborations').map(col => col.value).uniq()

path = Résumé
value = get('metadata.abstracts[0].value')

path = DOI
value = get('metadata.dois').map(doi => doi.value).uniq()

[assign]
path = UrlsLabo
value = get('Auteurs').map(author => author.affiliations.map(aff => aff.url)).flatten().uniq()

[assign]
path = codesLabos
value = get("UrlsLabo").map(url => String(url).split("/").slice(-1)[0])

[assign]
path = uri
value = get('id')

# Ensures that each object contains an identification key (required by lodex)
[swing]
test = pick(['URI', 'uri']).pickBy(_.identity).isEmpty()
[swing/identify]

# Prevent keys from containing dot path notation (which is forbidden by nodejs mongoDB driver)
[OBJFlatten]
separator = fix('.')
reverse = true
safe = true

# Uncomment to see each data sent to the database
#[debug]

# Add contextual metadata related to the import
[assign]
path = lodexStamp.importedDate
value = fix(new Date()).thru(d => d.toDateString())
path = lodexStamp.usedParser
value = env('parser')
path = lodexStamp.uploadedFilename
value = env('source')

#loader json-inspirehep.ini (version juin 2023 - Inist-CNRS)

append = dump
label = json-inspirehep

[use]
plugin = basics
plugin = lodex

[JSONParse]
separator = hits.hits.*

[replace]
path = id
value = get('id')

path = Titre
value = get('metadata.titles[0].title')
#1e occurrence

path = Type de document
value = get('metadata.document_type')

path = Année de publication
value = get('metadata.publication_info[0].year')
#1e occurrence

path = Revue
value = get('metadata.publication_info[0].journal_title')
#1e occurrence

path = Issue
value = get('metadata.publication_info[0].journal_issue')
#1e occurrence

path = N° de volume
value = get('metadata.publication_info[0].journal_volume')
#1e occurrence

path = N° conférence
value = get('metadata.publication_info[0].cnum')
#1e occurrence

path = Auteurs
value = get('metadata.authors').map(author => ({ full_name: author.full_name, affiliations: author.affiliations ? author.affiliations.map(aff => ({ label: aff.value || "", url: ( aff.record ? aff.record["$ref"] : "empty" ) }) ) : [] } ))

path = Catégorie inspire
value = get('metadata.inspire_categories').map(cat => cat.term).uniq()

path = Expériences
value = get('metadata.accelerator_experiments').map(equ => equ.legacy_name).uniq()

path = Collaborations
value = get('metadata.collaborations').map(col => col.value).uniq()

path = Résumé
value = get('metadata.abstracts[0].value')
#1e occurrence

path = DOI
value = get('metadata.dois').map(doi => doi.value).uniq()

[assign]
path = UrlsLabo
value = get('Auteurs').map(author => author.affiliations.filter(aff => aff.url!=="empty").map(aff => aff.url)).flatten().uniq()

[assign]
path = codesLabos
value = get("UrlsLabo").map(url => url.split("/").slice(-1)[0])

[OBJFlatten]
separator = /

[assign]
path = uri
value = get('id')
#constitution identifiant URI LOdex à partir de l'ID

0 comments on commit 08affed

Please sign in to comment.