From 1b5ba6032b3dfadcbde4fd49ab36060d663996a7 Mon Sep 17 00:00:00 2001 From: AnaelKremer <170732172+AnaelKremer@users.noreply.github.com> Date: Thu, 25 Jul 2024 10:04:52 +0200 Subject: [PATCH 1/2] feat: add query conditor for hal cnrs loader --- .../loaders/query-conditor-for-halcnrs.ini | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 workers/loaders/query-conditor-for-halcnrs.ini diff --git a/workers/loaders/query-conditor-for-halcnrs.ini b/workers/loaders/query-conditor-for-halcnrs.ini new file mode 100644 index 000000000..8f0d8d55b --- /dev/null +++ b/workers/loaders/query-conditor-for-halcnrs.ini @@ -0,0 +1,162 @@ +append = pack +label = query-conditor +extension = json +mimeType = application/json + +# load some plugins to activate some statements +[use] +plugin = conditor +plugin = basics + +# Toggle ezs traces (see server stderr log) +[debug] +ezs = false + +# {{{ +[TXTConcat] + +[replace] +path = q +value = self().trim() + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[assign] +path = uri +value = get('business.sourceUidChain') +# }}} + +[assign] +path = typologie +value = get("business.duplicateGenre") + +path = titre +value = get("title.default") + +path = resume +value = get("abstract.default") + +path = auteurs +value = get("authors").map("fullname") + +path = identifiantsAuteurs +value = get("authors").map(author => _.pick(author, ['orcId', 'idRef', 'idHal','viaf'])) + +path = affiliations +value = get("authors").map( author =>_.flatMap(author.affiliations, 'address')) + +path = identifiantsAffiliations +value = get("authors").map("rnsr") + +path = domaine +value = get("classifications") + +path = keywords +value = get("keywords") + +path = ppn +value=get("sourceUids").filter( id => id.startsWith('sudoc')).replace("sudoc-theses$","") + +path = nnt +value = get("fulltextUrl").replace(/^(?!http:\/\/www\.theses\.fr\/).*$/, '').replace("http://www.theses.fr/","").replace("/document","") + +[assign] +path = autresIdentifiants +value = fix({pmcid: self.pmcId,arxiv : self.arxiv,pmid: self.pmId,nnt: self.nnt,ppn : self.ppn}) + +path = funders +value = get("funders").map("fullname") + +path = publicationDate +value = get("host.publicationDate") + +path = electronicPublicationDate +value = get("host.electronicPublicationDate") + +path = volume +value = get("host.volume") + +path = issue +value = get("host.issue") + +path = pages +value = get("host.pages.range") + +path = nomConference +value = get("host.conference.name") + +path = dateDebutConference +value = get("host.conference.date") + +path = villeConference +value = get("host.conference.place") + +path = langueDocument +value = get("host.language") + +path = issn +value = get("host.issn") + +path = nomRevue +value = get("host.title") + +path = publisherRevue +value = get("host.publisher") + +path = isbn +value = get("host.isbn") + +path = editors +value = get("host.editors").map("fullname") + +path = sourceUids +value = get("sourceUids") + +path = halID +value = get("halId") + +path = doublonsHal +value = get("sourceUids").filter(uid => uid.includes('hal')).size().gt(1).replace(true,"Oui").replace(false,"Non") + +[assign] +path=isHal +value=get("business.sourceUidChain").replace(/\$.*?!/g,"!").split("!").compact().some(item=>(/hal/).test(item)).replace(false,"Non").replace(true,"Oui") + +path = fulltextURL +value = get("fulltextUrl") + +# Ensures that each object contains an identification key (required by lodex) +[swing] +test = pick(['URI', 'uri']).pickBy(_.identity).isEmpty() +[swing/identify] + +# Ignore objects with duplicate URI +[dedupe] +ignore = true + +# Prevent keys from containing dot path notation (which is forbidden by nodejs mongoDB driver) +[OBJFlatten] +separator = fix('.') +reverse = true +safe = true + +# Uncomment to see each data sent to the database +#[debug] + +[exchange] +value = omit(["abstract","authors","classifications","business","pii","arxiv","inspire","localRef","pmcId","articleNumber","nnt","ppn","origins","technical","halId","title","originalGenre","pmId","fulltextUrl","enrichments","host"]) + +# Add contextual metadata related to the import +[assign] +path = lodexStamp.importedDate +value = fix(new Date()).thru(d => d.toDateString()) +path = lodexStamp.usedParser +value = env('parser') +path = lodexStamp.uploadedFilename +value = env('source') +path = uri +value = get('uri').trim() + From 2463e69f7ed776654b9bcf01ab5843fe9ce2ca5a Mon Sep 17 00:00:00 2001 From: AnaelKremer <170732172+AnaelKremer@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:36:58 +0200 Subject: [PATCH 2/2] Update query-conditor-for-halcnrs.ini --- workers/loaders/query-conditor-for-halcnrs.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/loaders/query-conditor-for-halcnrs.ini b/workers/loaders/query-conditor-for-halcnrs.ini index 8f0d8d55b..273181960 100644 --- a/workers/loaders/query-conditor-for-halcnrs.ini +++ b/workers/loaders/query-conditor-for-halcnrs.ini @@ -1,5 +1,5 @@ append = pack -label = query-conditor +label = query-conditor-for-halcnrs extension = json mimeType = application/json