From 5d2b8aab1dd84cc3ce38ad218e571dc326274017 Mon Sep 17 00:00:00 2001 From: Ewan Cahen Date: Tue, 8 Oct 2024 14:23:28 +0200 Subject: [PATCH 1/4] feat: allow for searching mentions by OpenAlex ID --- .../admin/mentions/MentionsOverview.tsx | 10 ++- .../components/mention/EditMentionModal.tsx | 68 ++++++++-------- .../mention/FindMentionInfoPanel.tsx | 4 +- .../components/mention/FindMentionSection.tsx | 79 ++++++++++++------- .../ImportMentions/apiImportMentions.tsx | 43 +++++----- frontend/components/mention/config.ts | 8 +- .../software/edit/mentions/utils.ts | 13 ++- frontend/utils/editMentions.ts | 34 +++++++- frontend/utils/getOpenalex.ts | 42 +++++++++- 9 files changed, 200 insertions(+), 101 deletions(-) diff --git a/frontend/components/admin/mentions/MentionsOverview.tsx b/frontend/components/admin/mentions/MentionsOverview.tsx index 43055110e..f0e667a7e 100644 --- a/frontend/components/admin/mentions/MentionsOverview.tsx +++ b/frontend/components/admin/mentions/MentionsOverview.tsx @@ -49,10 +49,14 @@ export default function MentionsOverview() { const searchTypeTerm: SearchTermInfo = extractSearchTerm(sanitisedSearch) const termEscaped = encodeURIComponent(sanitisedSearch) - if (searchTypeTerm.type === 'doi') { - return `doi=eq.${termEscaped}` + switch (searchTypeTerm.type) { + case 'doi': + return `doi=eq.${termEscaped}` + case 'openalex': + return `openalex_id=eq.${termEscaped}` + case 'title': + return `or=(title.ilike.*${termEscaped}*,authors.ilike.*${termEscaped}*,journal.ilike.*${termEscaped}*,url.ilike.*${termEscaped}*,note.ilike.*${termEscaped}*,openalex_id.ilike.*${termEscaped}*)` } - return `or=(title.ilike.*${termEscaped}*,authors.ilike.*${termEscaped}*,journal.ilike.*${termEscaped}*,url.ilike.*${termEscaped}*,note.ilike.*${termEscaped}*,openalex_id.ilike.*${termEscaped}*)` } function sanitiseSearch(search: string): string | undefined { diff --git a/frontend/components/mention/EditMentionModal.tsx b/frontend/components/mention/EditMentionModal.tsx index c4d61d493..72b009671 100644 --- a/frontend/components/mention/EditMentionModal.tsx +++ b/frontend/components/mention/EditMentionModal.tsx @@ -65,7 +65,7 @@ export default function EditMentionModal({open, onCancel, onSubmit, item, pos, t const formData = watch() // need to clear image_url error manually after the type change // and dynamic rules change from required to not required - if (formData.mention_type!=='highlight' && errors?.hasOwnProperty('image_url')){ + if (formData.mention_type !== 'highlight' && errors?.hasOwnProperty('image_url')) { clearErrors('image_url') } @@ -130,23 +130,6 @@ export default function EditMentionModal({open, onCancel, onSubmit, item, pos, t width: ['100%'], padding: '1rem 1.5rem' }}> - {isAdmin && - <> - -
- - } {isAdmin && - <> -
- -
- + <> +
+ +
+ +
+ } {!isAdmin && - - The information can not be edited after creation. - } + + The information can not be edited after creation. + } +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -12,7 +14,7 @@ export default function FindMentionInfoPanel({children}:{children:any}) { icon={false} > {/* Add existing publication */} - We search in Crossref, DataCite and the RSD. + We search in Crossref, DataCite, OpenAlex and the RSD. All metadata will be imported automatically. { children } diff --git a/frontend/components/mention/FindMentionSection.tsx b/frontend/components/mention/FindMentionSection.tsx index 5f27cad7c..69f814409 100644 --- a/frontend/components/mention/FindMentionSection.tsx +++ b/frontend/components/mention/FindMentionSection.tsx @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: 2022 - 2023 Dusan Mijatovic (dv4all) -// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2022 - 2023 dv4all +// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) // @@ -8,13 +8,14 @@ import {useAuth} from '~/auth' import {MentionItemProps} from '~/types/Mention' -import {getMentionByDoiFromRsd} from '~/utils/editMentions' +import {getMentionByDoiFromRsd, getMentionByOpenalexIdFromRsd} from '~/utils/editMentions' import {getMentionByDoi} from '~/utils/getDOI' import EditSectionTitle from '~/components/layout/EditSectionTitle' import FindMention from '~/components/mention/FindMention' import FindMentionInfoPanel from '~/components/mention/FindMentionInfoPanel' import useEditMentionReducer from '~/components/mention/useEditMentionReducer' import {extractSearchTerm} from '~/components/software/edit/mentions/utils' +import {getMentionByOpenalexId} from '~/utils/getOpenalex' type FindProjectMentionProps={ id:string, @@ -39,35 +40,57 @@ export default function FindMentionSection({id,config,findPublicationByTitle}:Fi const {session: {token}} = useAuth() const {onAdd} = useEditMentionReducer() - async function findPublication(searchFor: string) { + async function findPublication(searchFor: string): Promise { const searchData = extractSearchTerm(searchFor) - if (searchData.type === 'doi') { - searchFor = searchData.term - // look first at RSD - const rsd = await getMentionByDoiFromRsd({ - doi: searchFor, - token - }) - if (rsd?.status === 200 && rsd.message?.length === 1) { - // return first found item in RSD - const item:MentionItemProps = rsd.message[0] - return [item] + switch (searchData.type) { + case 'doi': { + searchFor = searchData.term + // look first at RSD + const rsd = await getMentionByDoiFromRsd({ + doi: searchFor, + token + }) + if (rsd?.status === 200 && rsd.message?.length === 1) { + // return first found item in RSD + const item: MentionItemProps = rsd.message[0] + return [item] + } + // else find by DOI + const resp = await getMentionByDoi(searchFor) + if (resp?.status === 200) { + return [resp.message as MentionItemProps] + } + return [] } - // else find by DOI - const resp = await getMentionByDoi(searchFor) - if (resp?.status === 200) { - return [resp.message as MentionItemProps] + case 'openalex': { + searchFor = searchData.term + // look first at RSD + const rsd = await getMentionByOpenalexIdFromRsd({ + id: searchFor, + token + }) + if (rsd?.status === 200 && rsd.message?.length === 1) { + // return first found item in RSD + const item: MentionItemProps = rsd.message[0] + return [item] + } + // else find by DOI + const resp = await getMentionByOpenalexId(searchFor) + if (resp?.status === 200) { + return [resp.message as MentionItemProps] + } + return [] + } + case 'title': { + searchFor = searchData.term + // find by title + const mentions = await findPublicationByTitle({ + id: id, + searchFor, + token + }) + return mentions } - return [] - } else{ - searchFor = searchData.term - // find by title - const mentions = await findPublicationByTitle({ - id: id, - searchFor, - token - }) - return mentions } } diff --git a/frontend/components/mention/ImportMentions/apiImportMentions.tsx b/frontend/components/mention/ImportMentions/apiImportMentions.tsx index ca03af9b7..cf3e9b0be 100644 --- a/frontend/components/mention/ImportMentions/apiImportMentions.tsx +++ b/frontend/components/mention/ImportMentions/apiImportMentions.tsx @@ -17,11 +17,11 @@ import useEditMentionReducer from '../useEditMentionReducer' export type DoiBulkImportReport = Map | null -export function useValidateInputList(token:string) { +export function useValidateInputList(token: string) { const {mentions} = useEditMentionReducer() const [validating, setValidating] = useState(false) - async function validateInput(value:string) { + async function validateInput(value: string) { setValidating(true) const doiList = value.split(/\r\n|\n|\r/) const searchResults = await validateInputList(doiList, mentions, token) @@ -48,21 +48,24 @@ export async function validateInputList(doiList: string[], mentions: MentionItem // filter valid DOI type entries .filter(search => { // debugger - if (search.type === 'doi') { - // convert to lower case - const doi = search.term.toLowerCase() - // validate if not already included - const found = mentions.find(mention => mention.doi?.toLowerCase() === doi) - if (found) { - // flag item with DOI already processed - mentionResultPerDoi.set(doi, {doi ,status: 'alreadyImported', include: false}) - return false + switch (search.type) { + case 'doi': { + // convert to lower case + const doi = search.term.toLowerCase() + // validate if not already included + const found = mentions.find(mention => mention.doi?.toLowerCase() === doi) + if (found) { + // flag item with DOI already processed + mentionResultPerDoi.set(doi, {doi, status: 'alreadyImported', include: false}) + return false + } + return true } - return true - } else { - // flag invalid DOI entries - mentionResultPerDoi.set(search.term, {doi:search.term, status: 'invalidDoi', include: false}) - return false + case 'openalex': + case 'title': + // flag invalid DOI entries + mentionResultPerDoi.set(search.term, {doi: search.term, status: 'invalidDoi', include: false}) + return false } }) // extract DOI string from serch info @@ -164,7 +167,7 @@ export async function validateInputList(doiList: string[], mentions: MentionItem // flag dois that are not updated doisNotInDatabase.forEach(doi => { if (!mentionResultPerDoi.has(doi)) { - mentionResultPerDoi.set(doi, {doi,status: 'unknown', include: false}) + mentionResultPerDoi.set(doi, {doi, status: 'unknown', include: false}) } }) } @@ -172,8 +175,8 @@ export async function validateInputList(doiList: string[], mentions: MentionItem return mentionResultPerDoi } -export async function linkMentionToEntity({ids, table, entityName,entityId, token}: { - ids: string[], table: string, entityName: string, entityId:string, token: string +export async function linkMentionToEntity({ids, table, entityName, entityId, token}: { + ids: string[], table: string, entityName: string, entityId: string, token: string }) { try { const url = `/api/v1/${table}` @@ -207,7 +210,7 @@ export async function addMentions({mentions, token}: { mentions: MentionItemProp body: JSON.stringify(mentions) }) if (resp.status === 201) { - const json:MentionItemProps[] = await resp.json() + const json: MentionItemProps[] = await resp.json() return { status: 200, message: json diff --git a/frontend/components/mention/config.ts b/frontend/components/mention/config.ts index 514c9cf45..89880b692 100644 --- a/frontend/components/mention/config.ts +++ b/frontend/components/mention/config.ts @@ -6,13 +6,13 @@ // SPDX-License-Identifier: Apache-2.0 import {MentionByType, MentionTypeKeys} from '~/types/Mention' -import {doiRegexStrict} from '~/components/software/edit/mentions/utils' +import {DOI_REGEX_STRICT} from '~/components/software/edit/mentions/utils' export const findMention={ // title: 'Add publication', // subtitle: 'We search in Crossref, DataCite and RSD databases', - label: 'Search by DOI or publication title', - help: 'Valid DOI or at least first 2 letters of publication title', + label: 'Search by DOI, OpenAlex ID or publication title', + help: 'Valid DOI, OpenAlex ID or at least first 2 letters of publication title', validation: { // custom validation rule, not in use by react-hook-form minLength: 2, @@ -31,7 +31,7 @@ export const mentionModal = { message: 'Maximum length is 255' }, pattern: { - value: doiRegexStrict, + value: DOI_REGEX_STRICT, message: 'The DOI should look like 10.XXX/XXX' } } diff --git a/frontend/components/software/edit/mentions/utils.ts b/frontend/components/software/edit/mentions/utils.ts index 629e8e48a..065fd07ae 100644 --- a/frontend/components/software/edit/mentions/utils.ts +++ b/frontend/components/software/edit/mentions/utils.ts @@ -6,19 +6,24 @@ // // SPDX-License-Identifier: Apache-2.0 -const doiRegex = /10(\.\w+)+\/\S+/ -export const doiRegexStrict = /^10(\.\w+)+\/\S+$/ +const DOI_REGEX = /10(\.\w+)+\/\S+/ +export const DOI_REGEX_STRICT = /^10(\.\w+)+\/\S+$/ +const OPENALEX_ID_REGEX = /https:\/\/openalex\.org\/([WwAaSsIiCcPpFf]\d{3,13})/ export type SearchTermInfo = { term: string, - type: 'doi' | 'title' + type: 'doi' | 'title' | 'openalex' } export function extractSearchTerm(query: string): SearchTermInfo{ - const doiRegexMatch = query.match(doiRegex) + const doiRegexMatch = DOI_REGEX.exec(query) if (doiRegexMatch != null) { return {term: doiRegexMatch[0], type: 'doi'} } + const openalexRegexMatch = OPENALEX_ID_REGEX.exec(query) + if (openalexRegexMatch != null) { + return {term: openalexRegexMatch[0], type: 'openalex'} + } // remove double spaces: query = query.trim() query = query.replaceAll(/\s+/g, ' ') diff --git a/frontend/utils/editMentions.ts b/frontend/utils/editMentions.ts index 4590ff08c..acb98d3fc 100644 --- a/frontend/utils/editMentions.ts +++ b/frontend/utils/editMentions.ts @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 dv4all @@ -65,7 +65,35 @@ export async function getMentionByDoiFromRsd({doi,token}:{doi: string, token: st } return extractReturnMessage(resp) } catch (e:any) { - logger(`getDoiFromRsd: ${e?.message}`, 'error') + logger(`getMentionByDoiFromRsd: ${e?.message}`, 'error') + return { + status: 500, + message: e?.message + } + } +} + + +export async function getMentionByOpenalexIdFromRsd({id,token}:{id: string, token: string}) { + try { + // we need to encode the OpenAlex ID because it is a full URL + const url = `/api/v1/mention?select=${mentionColumns}&openalex_id=eq.${encodeURIComponent(id)}` + const resp = await fetch(url, { + method: 'GET', + headers: { + ...createJsonHeaders(token) + }, + }) + if (resp.status === 200) { + const json = await resp.json() + return { + status: 200, + message: json + } + } + return extractReturnMessage(resp) + } catch (e:any) { + logger(`getMentionByOpenalexIdFromRsd: ${e?.message}`, 'error') return { status: 500, message: e?.message @@ -264,7 +292,7 @@ export async function updateMentionItem({mention, token}: body: JSON.stringify(mention) }) - if ([200,204].includes(resp.status)===true) { + if ([200,204].includes(resp.status)) { // return item in message return { status: 200, diff --git a/frontend/utils/getOpenalex.ts b/frontend/utils/getOpenalex.ts index 3c03fc79f..4de4d87e8 100644 --- a/frontend/utils/getOpenalex.ts +++ b/frontend/utils/getOpenalex.ts @@ -7,6 +7,41 @@ import logger from '~/utils/logger' import {MentionItemProps} from '~/types/Mention' import {crossrefToRsdType} from '~/utils/getCrossref' +export async function getMentionByOpenalexId(id: string) { + try { + const url = `https://api.openalex.org/${encodeURI(id)}` + + const resp = await fetch(url) + + if (resp.status === 200) { + const json = await resp.json() + const mention = openalexItemToMentionItem(json) + return ({ + status: 200, + message: mention + }) + } + else if (resp.status === 404) { + return { + status: 404, + message: 'DOI not found' + } + } + else { + return ({ + status: resp.status, + message: 'unexpected response from OpenAlex' + }) + } + } catch (e:any) { + logger(`getMentionByOpenalexId: ${e?.message}`, 'error') + return { + status: 500, + message: e?.message + } + } +} + export async function getOpenalexItemByDoi(doi: string) { try { const url = `https://api.openalex.org/works/https://doi.org/${doi}` @@ -70,10 +105,13 @@ export async function getOpenalexItemsByDoi(dois: string[]) { } export function openalexItemToMentionItem(json: any): MentionItemProps { + const doiUrl = json.doi + const doi = doiUrl === null ? null : doiUrl.substring('https://doi.org/'.length) return ({ id: null, - doi: json.doi.substring('https://doi.org/'.length), - url: json.doi, + doi: doi, + openalex_id: json.id, + url: doiUrl, title: json.title, authors: extractAuthors(json), publisher: null, From d800792e6db641cbec6861fb6d3cb9ca6c3799bf Mon Sep 17 00:00:00 2001 From: Ewan Cahen Date: Tue, 8 Oct 2024 16:28:31 +0200 Subject: [PATCH 2/4] feat: mention title search now also searches the OpenAlex database --- .../impact/impactForProjectApi.test.ts | 3 +- .../mentions/impact/impactForProjectApi.ts | 5 +- .../output/outputForProjectApi.test.ts | 3 +- .../mentions/output/outputForProjectApi.ts | 5 +- .../mentions/output/apiRelatedOutput.test.ts | 3 +- .../edit/mentions/output/apiRelatedOutput.ts | 5 +- .../mention/{impact.ts => find_by_title.ts} | 96 ++++++++---- frontend/pages/api/fe/mention/output.ts | 137 ------------------ frontend/pages/api/fe/mention/software.ts | 134 ----------------- frontend/utils/getOpenalex.ts | 40 +++++ 10 files changed, 122 insertions(+), 309 deletions(-) rename frontend/pages/api/fe/mention/{impact.ts => find_by_title.ts} (55%) delete mode 100644 frontend/pages/api/fe/mention/output.ts delete mode 100644 frontend/pages/api/fe/mention/software.ts diff --git a/frontend/components/projects/edit/mentions/impact/impactForProjectApi.test.ts b/frontend/components/projects/edit/mentions/impact/impactForProjectApi.test.ts index b2152045b..0fb4c7247 100644 --- a/frontend/components/projects/edit/mentions/impact/impactForProjectApi.test.ts +++ b/frontend/components/projects/edit/mentions/impact/impactForProjectApi.test.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) (dv4all) // SPDX-FileCopyrightText: 2022 dv4all // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -34,7 +35,7 @@ it('findPublicationByTitle', async () => { token: 'TEST-TOKEN' } - const expectedUrl = `/api/fe/mention/impact?id=${props.id}&search=${encodeURIComponent(props.searchFor)}` + const expectedUrl = `/api/fe/mention/find_by_title?id=${props.id}&search=${encodeURIComponent(props.searchFor)}&relation_type=impact` const expectBody = { 'headers': { 'Authorization': `Bearer ${props.token}`, diff --git a/frontend/components/projects/edit/mentions/impact/impactForProjectApi.ts b/frontend/components/projects/edit/mentions/impact/impactForProjectApi.ts index a20df87dc..5a250d1ac 100644 --- a/frontend/components/projects/edit/mentions/impact/impactForProjectApi.ts +++ b/frontend/components/projects/edit/mentions/impact/impactForProjectApi.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 dv4all // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -12,8 +13,8 @@ import {addOrGetMentionItem} from '~/utils/editMentions' export async function findPublicationByTitle({id, searchFor, token}: { id: string, searchFor: string, token: string }) { - const query = `id=${id}&search=${encodeURIComponent(searchFor)}` - const url = `/api/fe/mention/impact?${query}` + const query = `id=${id}&search=${encodeURIComponent(searchFor)}&relation_type=impact` + const url = `/api/fe/mention/find_by_title?${query}` try { const resp = await fetch(url, { method: 'GET', diff --git a/frontend/components/projects/edit/mentions/output/outputForProjectApi.test.ts b/frontend/components/projects/edit/mentions/output/outputForProjectApi.test.ts index 8db50b468..95b53dd91 100644 --- a/frontend/components/projects/edit/mentions/output/outputForProjectApi.test.ts +++ b/frontend/components/projects/edit/mentions/output/outputForProjectApi.test.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) (dv4all) // SPDX-FileCopyrightText: 2023 dv4all // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -28,7 +29,7 @@ it('findPublicationByTitle', async () => { token: 'TEST-TOKEN' } - const expectedUrl = `/api/fe/mention/output?id=${props.id}&search=${encodeURIComponent(props.searchFor)}` + const expectedUrl = `/api/fe/mention/find_by_title?id=${props.id}&search=${encodeURIComponent(props.searchFor)}&relation_type=output` const expectBody = { 'headers': { 'Authorization': `Bearer ${props.token}`, diff --git a/frontend/components/projects/edit/mentions/output/outputForProjectApi.ts b/frontend/components/projects/edit/mentions/output/outputForProjectApi.ts index f5f680c1e..0c0912e3b 100644 --- a/frontend/components/projects/edit/mentions/output/outputForProjectApi.ts +++ b/frontend/components/projects/edit/mentions/output/outputForProjectApi.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 dv4all // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -12,8 +13,8 @@ import {addOrGetMentionItem} from '~/utils/editMentions' export async function findPublicationByTitle({id, searchFor, token}: { id: string, searchFor: string, token: string }) { - const query = `id=${id}&search=${encodeURIComponent(searchFor)}` - const url = `/api/fe/mention/output?${query}` + const query = `id=${id}&search=${encodeURIComponent(searchFor)}&relation_type=output` + const url = `/api/fe/mention/find_by_title?${query}` try { const resp = await fetch(url, { method: 'GET', diff --git a/frontend/components/software/edit/mentions/output/apiRelatedOutput.test.ts b/frontend/components/software/edit/mentions/output/apiRelatedOutput.test.ts index e649e14e3..e0ffd2e40 100644 --- a/frontend/components/software/edit/mentions/output/apiRelatedOutput.test.ts +++ b/frontend/components/software/edit/mentions/output/apiRelatedOutput.test.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) (dv4all) // SPDX-FileCopyrightText: 2023 dv4all // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -26,7 +27,7 @@ it('findPublicationByTitle', async () => { token: 'TEST-TOKEN' } - const expectedUrl = `/api/fe/mention/software?id=${props.id}&search=${encodeURIComponent(props.searchFor)}` + const expectedUrl = `/api/fe/mention/find_by_title?id=${props.id}&search=${encodeURIComponent(props.searchFor)}&relation_type=software` const expectBody = { 'headers': { 'Authorization': `Bearer ${props.token}`, diff --git a/frontend/components/software/edit/mentions/output/apiRelatedOutput.ts b/frontend/components/software/edit/mentions/output/apiRelatedOutput.ts index 5be78bb6b..ec6046716 100644 --- a/frontend/components/software/edit/mentions/output/apiRelatedOutput.ts +++ b/frontend/components/software/edit/mentions/output/apiRelatedOutput.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) // SPDX-FileCopyrightText: 2022 dv4all // SPDX-FileCopyrightText: 2024 Dusan Mijatovic (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -18,8 +19,8 @@ import {addOrGetMentionItem} from '~/utils/editMentions' */ export async function findPublicationByTitle({id, searchFor, token}: { id: string, searchFor: string, token: string }) { - const query = `id=${id}&search=${encodeURIComponent(searchFor)}` - const url = `/api/fe/mention/software?${query}` + const query = `id=${id}&search=${encodeURIComponent(searchFor)}&relation_type=software` + const url = `/api/fe/mention/find_by_title?${query}` try { const resp = await fetch(url, { method: 'GET', diff --git a/frontend/pages/api/fe/mention/impact.ts b/frontend/pages/api/fe/mention/find_by_title.ts similarity index 55% rename from frontend/pages/api/fe/mention/impact.ts rename to frontend/pages/api/fe/mention/find_by_title.ts index d82a7abc6..1b9b467c2 100644 --- a/frontend/pages/api/fe/mention/impact.ts +++ b/frontend/pages/api/fe/mention/find_by_title.ts @@ -1,32 +1,46 @@ -// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) -// SPDX-FileCopyrightText: 2022 dv4all +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 -// Next.js API route support: https://nextjs.org/docs/api-routes/introduction import type {NextApiRequest, NextApiResponse} from 'next' -import {getSessionSeverSide} from '~/auth' -import {CrossrefSelectItem} from '~/types/Crossref' -import {WorkResponse} from '~/types/Datacite' import {MentionItemProps} from '~/types/Mention' -import {extractParam,Error} from '~/utils/apiHelpers' +import {Error, extractParam} from '~/utils/apiHelpers' +import {getSessionSeverSide} from '~/auth' +import logger from '~/utils/logger' import {createJsonHeaders, getBaseUrl, promiseWithTimeout} from '~/utils/fetchHelpers' import {crossrefItemToMentionItem, getCrossrefItemsByTitle} from '~/utils/getCrossref' import {dataCiteGraphQLItemToMentionItem, getDataciteItemsByTitleGraphQL} from '~/utils/getDataCite' +import {CrossrefSelectItem} from '~/types/Crossref' +import {WorkResponse} from '~/types/Datacite' import {itemsNotInReferenceList} from '~/utils/itemsNotInReferenceList' -import logger from '~/utils/logger' import {sortBySearchFor} from '~/utils/sortFn' +import {getOpenalexMentionsByTitle} from '~/utils/getOpenalex' + +const crossrefTimeoutSec = 30 + +type RelationType = 'software' | 'impact' | 'output' -export const crossrefTimeoutSec = 30 +function getUrlForRelationType(relationType: RelationType): string { + const baseUrl = getBaseUrl() + switch (relationType) { + case 'software': + return `${baseUrl}/rpc/search_mentions_for_software` + case 'impact': + return `${baseUrl}/rpc/search_impact_for_project` + case 'output': + return `${baseUrl}/rpc/search_output_for_project` + } +} /** * Searching for items in mention table which are NOT assigned to impact of the project already. * @returns MentionItem[] */ -export async function searchForAvailableImpact({project, searchFor, token}: - { project: string, searchFor: string, token: string }) { - const baseUrl = getBaseUrl() - const url = `${baseUrl}/rpc/search_impact_for_project` +export async function searchForAvailableMentions({project, searchFor, token, relationType}: + { project: string, searchFor: string, token: string, relationType: RelationType}) { + + const url = getUrlForRelationType(relationType) try { const resp = await fetch(url, { method: 'POST', @@ -41,32 +55,33 @@ export async function searchForAvailableImpact({project, searchFor, token}: const json: MentionItemProps[] = await resp.json() return json } - logger(`searchForAvailableImpact: 404 [${url}]`, 'error') + logger(`searchForAvailableMentions: 404 [${url}]`, 'error') return [] } catch (e: any) { - logger(`searchForAvailableImpact: ${e?.message}`, 'error') + logger(`searchForAvailableMentions: ${e?.message}`, 'error') return [] } } -export async function findPublicationByTitle({project, searchFor, token}: - { project: string, searchFor: string, token: string }) { - const promises = [ +export async function findPublicationByTitle({project, searchFor, token, relationType}: { project: string, searchFor: string, token: string, relationType: RelationType }) { + const promises: Promise[] = [ promiseWithTimeout(getCrossrefItemsByTitle(searchFor), crossrefTimeoutSec), getDataciteItemsByTitleGraphQL(searchFor), - searchForAvailableImpact({ + getOpenalexMentionsByTitle(searchFor), + searchForAvailableMentions({ project, searchFor, - token + token, + relationType }) ] // make requests - const [crossref, datacite, rsd] = await Promise.allSettled(promises) + const [crossref, datacite, openalex, rsd] = await Promise.allSettled(promises) // convert crossref responses to MentionItems let crosrefItems: MentionItemProps[] = [] if (crossref.status === 'fulfilled') { - crosrefItems = crossref?.value.map(item => { - return crossrefItemToMentionItem(item as CrossrefSelectItem) + crosrefItems = crossref?.value.map((item: CrossrefSelectItem) => { + return crossrefItemToMentionItem(item) }) } else { logger(`impact.findPublicationByTitle: Crossref request timeout after ${crossrefTimeoutSec}sec.`, 'warn') @@ -74,18 +89,28 @@ export async function findPublicationByTitle({project, searchFor, token}: // convert datacite responses to MentionItems let dataciteItems: MentionItemProps[] = [] if (datacite.status === 'fulfilled') { - dataciteItems = datacite?.value.map(item => { - return dataCiteGraphQLItemToMentionItem(item as WorkResponse) + dataciteItems = datacite?.value.map((item: WorkResponse) => { + return dataCiteGraphQLItemToMentionItem(item) }) } else { logger(`impact.findPublicationByTitle: Datacite request failed ${datacite.reason}`, 'warn') } + + let openalexMentions: MentionItemProps[] = [] + if (openalex.status === 'fulfilled') { + if (openalex.value.status === 200) { + openalexMentions = openalex.value.result as MentionItemProps[] + } + } else { + logger(`find_by_title.findPublicationByTitle: OpenAlex request failed ${openalex.reason}`, 'warn') + } + // change items source to RSD for ones pulled from RSD let rsdItems: MentionItemProps[] = [] if (rsd.status === 'fulfilled') { rsdItems = rsd.value as MentionItemProps[] } else { - logger(`impact.findPublicationByTitle: RSD request failed ${rsd.reason}`, 'warn') + logger(`find_by_title.findPublicationByTitle: RSD request failed ${rsd.reason}`, 'warn') } // return results const sorted = [ @@ -102,12 +127,17 @@ export async function findPublicationByTitle({project, searchFor, token}: list: dataciteItems, referenceList: rsdItems, key: 'doi' + }), + // OpenAlex items not existing in RSD + ...itemsNotInReferenceList( { + list: openalexMentions, + referenceList: rsdItems, + key: 'doi' }) ].sort((a, b) => sortBySearchFor(a, b, 'title', searchFor)) return sorted } - export default async function handler( req: NextApiRequest, res: NextApiResponse @@ -116,6 +146,13 @@ export default async function handler( // extract query parameters const project = extractParam(req, 'id') const searchFor = extractParam(req, 'search') + const relationType = extractParam(req, 'relation_type') + if (!['software', 'impact', 'output'].includes(relationType)) { + return res.status(400).json({ + message: 'Please provide a valid relation_type (software, impact or output)' + }) + } + const relationTypeChecked = relationType as RelationType const session = getSessionSeverSide(req, res) if (session?.status !== 'authenticated') { return res.status(401).json({ @@ -126,13 +163,14 @@ export default async function handler( const mentions = await findPublicationByTitle({ project, searchFor, - token:session.token + token:session.token, + relationType: relationTypeChecked }) res.status(200).json(mentions) } catch (e: any) { - logger(`api.impact: ${e.message}`, 'error') + logger(`api.find_by_title: ${e.message}`, 'error') res.status(500).json({message: e.message}) } } diff --git a/frontend/pages/api/fe/mention/output.ts b/frontend/pages/api/fe/mention/output.ts deleted file mode 100644 index d5bcbc1b0..000000000 --- a/frontend/pages/api/fe/mention/output.ts +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) -// SPDX-FileCopyrightText: 2022 dv4all -// -// SPDX-License-Identifier: Apache-2.0 - -// Next.js API route support: https://nextjs.org/docs/api-routes/introduction -import type {NextApiRequest, NextApiResponse} from 'next' -import {getSessionSeverSide} from '~/auth' -import {CrossrefSelectItem} from '~/types/Crossref' -import {WorkResponse} from '~/types/Datacite' -import {MentionItemProps} from '~/types/Mention' -import {extractParam,Error} from '~/utils/apiHelpers' -import {createJsonHeaders, getBaseUrl, promiseWithTimeout} from '~/utils/fetchHelpers' -import {crossrefItemToMentionItem, getCrossrefItemsByTitle} from '~/utils/getCrossref' -import {dataCiteGraphQLItemToMentionItem, getDataciteItemsByTitleGraphQL} from '~/utils/getDataCite' -import {itemsNotInReferenceList} from '~/utils/itemsNotInReferenceList' -import logger from '~/utils/logger' -import {sortBySearchFor} from '~/utils/sortFn' -import {crossrefTimeoutSec} from './impact' - -/** - * Searching for items in mention table which are NOT assigned to impact of the project already. - * @returns MentionItem[] - */ -export async function searchForAvailableOutput({project, searchFor, token}: - { project: string, searchFor: string, token: string }) { - const baseUrl = getBaseUrl() - const url = `${baseUrl}/rpc/search_output_for_project` - try { - const resp = await fetch(url, { - method: 'POST', - headers: createJsonHeaders(token), - body: JSON.stringify({ - project_id: project, - search_text: searchFor - }) - }) - // debugger - if (resp.status === 200) { - const json: MentionItemProps[] = await resp.json() - return json - } - logger(`searchForAvailableOutput: 404 [${url}]`, 'error') - return [] - } catch (e: any) { - logger(`searchForAvailableOutput: ${e?.message}`, 'error') - return [] - } -} - -export async function findPublicationByTitle({project, searchFor, token}: - { project: string, searchFor: string, token: string }) { - const promises = [ - promiseWithTimeout(getCrossrefItemsByTitle(searchFor), crossrefTimeoutSec), - getDataciteItemsByTitleGraphQL(searchFor), - searchForAvailableOutput({ - project, - searchFor, - token - }) - ] - // make requests - const [crossref, datacite, rsd] = await Promise.allSettled(promises) - // convert crossref responses to MentionItems - let crosrefItems: MentionItemProps[] = [] - if (crossref.status === 'fulfilled') { - crosrefItems = crossref?.value.map(item => { - return crossrefItemToMentionItem(item as CrossrefSelectItem) - }) - } else { - logger(`output.findPublicationByTitle: Crossref request timeout after ${crossrefTimeoutSec}sec.`, 'warn') - } - // convert datacite responses to MentionItems - let dataciteItems: MentionItemProps[] = [] - if (datacite.status === 'fulfilled') { - dataciteItems = datacite?.value.map(item => { - return dataCiteGraphQLItemToMentionItem(item as WorkResponse) - }) - } else { - logger(`output.findPublicationByTitle: Datacite request failed ${datacite.reason}`, 'warn') - } - // change items source to RSD for ones pulled from RSD - let rsdItems: MentionItemProps[] = [] - if (rsd.status === 'fulfilled') { - rsdItems = rsd.value as MentionItemProps[] - } else { - logger(`output.findPublicationByTitle: RSD request failed ${rsd.reason}`, 'warn') - } - // return results - const sorted = [ - // RSD items at the top - ...rsdItems, - // Crossref items not existing in RSD - ...itemsNotInReferenceList({ - list: crosrefItems, - referenceList: rsdItems, - key: 'doi' - }), - // Datacite items not existing in RSD - ...itemsNotInReferenceList({ - list: dataciteItems, - referenceList: rsdItems, - key: 'doi' - }) - ].sort((a, b) => sortBySearchFor(a, b, 'title', searchFor)) - return sorted -} - - -export default async function handler( - req: NextApiRequest, - res: NextApiResponse -) { - try { - // extract query parameters - const project = extractParam(req, 'id') - const searchFor = extractParam(req, 'search') - const session = getSessionSeverSide(req, res) - if (session?.status !== 'authenticated') { - return res.status(401).json({ - message: '401 Unauthorized' - }) - } - - const mentions = await findPublicationByTitle({ - project, - searchFor, - token:session.token - }) - - res.status(200).json(mentions) - - } catch (e: any) { - logger(`api.impact: ${e.message}`, 'error') - res.status(500).json({message: e.message}) - } -} diff --git a/frontend/pages/api/fe/mention/software.ts b/frontend/pages/api/fe/mention/software.ts deleted file mode 100644 index 4828a7016..000000000 --- a/frontend/pages/api/fe/mention/software.ts +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-FileCopyrightText: 2022 Dusan Mijatovic (dv4all) -// SPDX-FileCopyrightText: 2022 dv4all -// -// SPDX-License-Identifier: Apache-2.0 - -// Next.js API route support: https://nextjs.org/docs/api-routes/introduction -import type {NextApiRequest, NextApiResponse} from 'next' -import {getSessionSeverSide} from '~/auth' -import {CrossrefSelectItem} from '~/types/Crossref' -import {WorkResponse} from '~/types/Datacite' -import {MentionItemProps} from '~/types/Mention' -import {extractParam,Error} from '~/utils/apiHelpers' -import {createJsonHeaders, getBaseUrl, promiseWithTimeout} from '~/utils/fetchHelpers' -import {crossrefItemToMentionItem, getCrossrefItemsByTitle} from '~/utils/getCrossref' -import {dataCiteGraphQLItemToMentionItem, getDataciteItemsByTitleGraphQL} from '~/utils/getDataCite' -import {itemsNotInReferenceList} from '~/utils/itemsNotInReferenceList' -import logger from '~/utils/logger' -import {sortBySearchFor} from '~/utils/sortFn' -import {crossrefTimeoutSec} from './impact' - -/** - * Searching for items in mention table which are NOT assigned to impact of the project already. - * @returns MentionItem[] - */ -export async function searchForAvailableMentions({software, searchFor, token}: - { software: string, searchFor: string, token: string }) { - const limit = 10 - const baseUrl = getBaseUrl() - const url = `${baseUrl}/rpc/search_mentions_for_software?software_id=${software}&search_text=${searchFor}&limit=${limit}` - try { - const resp = await fetch(url, { - method: 'GET', - headers: createJsonHeaders(token) - }) - // debugger - if (resp.status === 200) { - const json: MentionItemProps[] = await resp.json() - return json - } - logger(`searchForAvailableMentions: ${resp.status} ${resp.statusText} [${url}]`, 'error') - return [] - } catch (e: any) { - logger(`searchForAvailableMentions: ${e?.message}`, 'error') - return [] - } -} - -export async function findPublicationByTitle({software, searchFor, token}: - { software: string, searchFor: string, token: string }) { - const promises = [ - promiseWithTimeout(getCrossrefItemsByTitle(searchFor), crossrefTimeoutSec), - getDataciteItemsByTitleGraphQL(searchFor), - searchForAvailableMentions({ - software, - searchFor, - token - }) - ] - // make requests - const [crossref, datacite, rsd] = await Promise.allSettled(promises) - // convert crossref responses to MentionItems - let crosrefItems: MentionItemProps[] = [] - if (crossref.status === 'fulfilled') { - crosrefItems = crossref?.value.map(item => { - return crossrefItemToMentionItem(item as CrossrefSelectItem) - }) - } else { - logger(`software.findPublicationByTitle: Crossref request timeout after ${crossrefTimeoutSec}sec.`, 'warn') - } - // convert datacite responses to MentionItems - let dataciteItems: MentionItemProps[] = [] - if (datacite.status === 'fulfilled') { - dataciteItems = datacite?.value.map(item => { - return dataCiteGraphQLItemToMentionItem(item as WorkResponse) - }) - } else { - logger(`software.findPublicationByTitle: Datacite request failed ${datacite.reason}`, 'warn') - } - // change items source to RSD for ones pulled from RSD - let rsdItems: MentionItemProps[] = [] - if (rsd.status === 'fulfilled') { - rsdItems = rsd.value as MentionItemProps[] - } else { - logger(`software.findPublicationByTitle: RSD request failed ${rsd.reason}`, 'warn') - } - // return results - const sorted = [ - // RSD items at the top - ...rsdItems, - // Crossref items not existing in RSD - ...itemsNotInReferenceList({ - list: crosrefItems, - referenceList: rsdItems, - key: 'doi' - }), - // Datacite items not existing in RSD - ...itemsNotInReferenceList({ - list: dataciteItems, - referenceList: rsdItems, - key: 'doi' - }) - ].sort((a, b) => sortBySearchFor(a, b, 'title', searchFor)) - return sorted -} - - -export default async function handler( - req: NextApiRequest, - res: NextApiResponse -) { - try { - // extract query parameters - const software = extractParam(req, 'id') - const searchFor = extractParam(req, 'search') - const session = getSessionSeverSide(req, res) - if (session?.status !== 'authenticated') { - return res.status(401).json({ - message: '401 Unauthorized' - }) - } - - const mentions = await findPublicationByTitle({ - software, - searchFor, - token:session.token - }) - - res.status(200).json(mentions) - - } catch (e: any) { - logger(`api.software: ${e.message}`, 'error') - res.status(500).json({message: e.message}) - } -} diff --git a/frontend/utils/getOpenalex.ts b/frontend/utils/getOpenalex.ts index 4de4d87e8..9a32a7be4 100644 --- a/frontend/utils/getOpenalex.ts +++ b/frontend/utils/getOpenalex.ts @@ -42,6 +42,46 @@ export async function getMentionByOpenalexId(id: string) { } } +export async function getOpenalexMentionsByTitle(search: string): Promise<{status: number, message: string, result: MentionItemProps[]}> { + try { + const url = `https://api.openalex.org/works?search=${encodeURI(search)}` + + const resp = await fetch(url) + + if (resp.status === 200) { + const json = await resp.json() + const mentions: MentionItemProps[] = json.results.map((result: any) => openalexItemToMentionItem(result)) + return ({ + status: 200, + message: 'success', + result: mentions + }) + } + else if (resp.status === 404) { + return { + status: 404, + message: 'not found', + result: [] + } + } + else { + return ({ + status: resp.status, + message: `unexpected response from OpenAlex: ${await resp.text()}`, + result: [] + }) + } + } catch (e:any) { + logger(`getOpenalexMentionsByTitle: ${e?.message}`, 'error') + return { + status: 500, + message: e?.message, + result: [] + } + } +} + + export async function getOpenalexItemByDoi(doi: string) { try { const url = `https://api.openalex.org/works/https://doi.org/${doi}` From 67566fc244d9fd5824077799a894071c91e20c9d Mon Sep 17 00:00:00 2001 From: Ewan Cahen Date: Wed, 9 Oct 2024 20:22:38 +0200 Subject: [PATCH 3/4] feat: add scraper for mentions with an OpenAlex ID --- .../rsd/scraper/doi/MainCitations.java | 4 +- .../rsd/scraper/doi/MainMentions.java | 66 +++++++++++++------ ...xCitations.java => OpenAlexConnector.java} | 44 +++++++++++-- .../rsd/scraper/doi/OpenalexId.java | 4 ++ .../doi/PostgrestMentionRepository.java | 2 +- .../scraper/doi/OpenAlexCitationsTest.java | 2 +- 6 files changed, 95 insertions(+), 27 deletions(-) rename scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/{OpenAlexCitations.java => OpenAlexConnector.java} (82%) diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java index 050d9069d..da9559c11 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java @@ -36,7 +36,7 @@ public static void main(String[] args) { PostgrestCitationRepository localCitationRepository = new PostgrestCitationRepository(backendUrl); Collection referencePapersToScrape = localCitationRepository.leastRecentlyScrapedCitations(5); - OpenAlexCitations openAlexCitations = new OpenAlexCitations(); + OpenAlexConnector openAlexConnector = new OpenAlexConnector(); PostgrestMentionRepository localMentionRepository = new PostgrestMentionRepository(backendUrl); String email = Config.crossrefContactEmail().orElse(null); Instant now = Instant.now(); @@ -47,7 +47,7 @@ public static void main(String[] args) { LOGGER.info("Scraping for DOI {}, OpenAlex ID {}", citationData.doi(), citationData.openalexId()); - Collection citingMentions = openAlexCitations.citations(citationData.openalexId(), citationData.doi(), email, citationData.id()); + Collection citingMentions = openAlexConnector.citations(citationData.openalexId(), citationData.doi(), email, citationData.id()); // we don't update mentions that have a DOI in the database with OpenAlex data, as they can already be // scraped through Crossref or DataCite diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java index 7b996e0ad..b8fb0f767 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java @@ -15,10 +15,13 @@ import org.slf4j.LoggerFactory; import java.time.Instant; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.UUID; import java.util.stream.Collectors; public class MainMentions { @@ -36,13 +39,27 @@ public static void main(String[] args) { // we will remove successfully scraped mentions from here, // we use this to set scrapedAt even for failed mentions, // to put them back at the scraping queue - Map mentionsFailedToScrape = new HashMap<>(); + Map mentionsFailedToScrape = new HashMap<>(); + Map doiToId = new HashMap<>(); + Map openalexIdToId = new HashMap<>(); for (RsdMentionIds mentionIds : mentionsToScrape) { - mentionsFailedToScrape.put(mentionIds.doi(), mentionIds); + UUID id = mentionIds.id(); + mentionsFailedToScrape.put(id, mentionIds); + + Doi doi = mentionIds.doi(); + if (doi != null) { + doiToId.put(doi, id); + } + + OpenalexId openalexId = mentionIds.openalexId(); + if (openalexId != null) { + openalexIdToId.put(openalexId, id); + } } String doisJoined = mentionsToScrape.stream() .map(RsdMentionIds::doi) + .filter(Objects::nonNull) .map(Doi::toUrlEncodedString) .collect(Collectors.joining(",")); String jsonSources = null; @@ -72,14 +89,14 @@ public static void main(String[] args) { } for (ExternalMentionRecord scrapedMention : scrapedDataciteMentions) { Doi doi = scrapedMention.doi(); - RsdMentionIds ids = mentionsFailedToScrape.get(doi); + UUID id = doiToId.get(doi); try { - RsdMentionRecord mentionToUpdate = new RsdMentionRecord(ids.id(), scrapedMention, now); + RsdMentionRecord mentionToUpdate = new RsdMentionRecord(id, scrapedMention, now); localMentionRepository.updateMention(mentionToUpdate, false); - mentionsFailedToScrape.remove(doi); + mentionsFailedToScrape.remove(id); } catch (Exception e) { LOGGER.error("Failed to update a DataCite mention with DOI {}", scrapedMention.doi()); - Utils.saveExceptionInDatabase("Mention scraper", "mention", ids.id(), e); + Utils.saveExceptionInDatabase("Mention scraper", "mention", id, e); } } @@ -94,30 +111,30 @@ public static void main(String[] args) { .toList(); for (Doi crossrefDoi : crossrefDois) { ExternalMentionRecord scrapedMention; + UUID id = doiToId.get(crossrefDoi); try { scrapedMention = new CrossrefMention(crossrefDoi).mentionData(); } catch (Exception e) { LOGGER.error("Failed to scrape a Crossref mention with DOI {}", crossrefDoi); RuntimeException exceptionWithMessage = new RuntimeException("Failed to scrape a Crossref mention with DOI " + crossrefDoi, e); - Utils.saveExceptionInDatabase("Crossref mention scraper", "mention", mentionsFailedToScrape.get(crossrefDoi).id(), exceptionWithMessage); + Utils.saveExceptionInDatabase("Crossref mention scraper", "mention", id, exceptionWithMessage); continue; } - Doi doi = scrapedMention.doi(); - RsdMentionIds ids = mentionsFailedToScrape.get(doi); - RsdMentionRecord mentionToUpdate = new RsdMentionRecord(ids.id(), scrapedMention, now); + RsdMentionRecord mentionToUpdate = new RsdMentionRecord(id, scrapedMention, now); try { localMentionRepository.updateMention(mentionToUpdate, false); - mentionsFailedToScrape.remove(doi); + mentionsFailedToScrape.remove(id); } catch (Exception e) { RuntimeException exceptionWithMessage = new RuntimeException("Failed to update a Crossref mention with DOI " + crossrefDoi, e); - Utils.saveExceptionInDatabase("Crossref mention scraper", "mention", ids.id(), exceptionWithMessage); + Utils.saveExceptionInDatabase("Crossref mention scraper", "mention", id, exceptionWithMessage); } } // END CROSSREF // OPENALEX (for European Publication Office DOIs) String email = Config.crossrefContactEmail().orElse(null); - Collection scrapedOpenalexMentions = List.of(); + Collection scrapedOpenalexMentions = new ArrayList<>(); + OpenAlexConnector openAlexConnector = new OpenAlexConnector(); Collection europeanPublicationsOfficeDois = doiToSource.entrySet() .stream() .filter(doiSourceEntry -> doiSourceEntry.getValue().equals("OP")) @@ -125,20 +142,31 @@ public static void main(String[] args) { .map(Doi::fromString) .toList(); try { - scrapedOpenalexMentions = new OpenAlexCitations().mentionData(europeanPublicationsOfficeDois, email); + scrapedOpenalexMentions.addAll(openAlexConnector.mentionDataByDois(europeanPublicationsOfficeDois, email)); } catch (Exception e) { Utils.saveExceptionInDatabase("OpenAlex mention scraper", "mention", null, e); } + Collection openalexIdsToScrape = mentionsToScrape + .stream() + .filter(ids -> ids.doi() == null && ids.openalexId() != null) + .map(RsdMentionIds::openalexId) + .toList(); + try { + scrapedOpenalexMentions.addAll(openAlexConnector.mentionDataByOpenalexIds(openalexIdsToScrape, email)); + } catch (Exception e) { + Utils.saveExceptionInDatabase("OpenAlex mention scraper", "mention", null, e); + } + for (ExternalMentionRecord scrapedMention : scrapedOpenalexMentions) { - Doi doi = scrapedMention.doi(); - RsdMentionIds ids = mentionsFailedToScrape.get(doi); - RsdMentionRecord mentionToUpdate = new RsdMentionRecord(ids.id(), scrapedMention, now); + OpenalexId openalexId = scrapedMention.openalexId(); + UUID id = openalexIdToId.get(openalexId); + RsdMentionRecord mentionToUpdate = new RsdMentionRecord(id, scrapedMention, now); try { localMentionRepository.updateMention(mentionToUpdate, true); - mentionsFailedToScrape.remove(doi); + mentionsFailedToScrape.remove(id); } catch (Exception e) { LOGGER.error("Failed to update an OpenAlex mention with DOI {}", scrapedMention.doi()); - Utils.saveExceptionInDatabase("Mention scraper", "mention", ids.id(), e); + Utils.saveExceptionInDatabase("Mention scraper", "mention", id, e); } } // END OPENALEX diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexConnector.java similarity index 82% rename from scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java rename to scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexConnector.java index 1e997b8b8..c9504a972 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexConnector.java @@ -26,19 +26,20 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; -class OpenAlexCitations { +class OpenAlexConnector { - private static final Logger LOGGER = LoggerFactory.getLogger(OpenAlexCitations.class); + private static final Logger LOGGER = LoggerFactory.getLogger(OpenAlexConnector.class); static final String DOI_FILTER_URL_UNFORMATTED = "https://api.openalex.org/works?filter=doi:%s"; static final String OPENALEX_ID_URL_UNFORMATTED = "https://api.openalex.org/works?filter=ids.openalex:%s"; - public Collection mentionData(Collection dataciteDois, String email) throws IOException, InterruptedException { - String filter = dataciteDois + public Collection mentionDataByDois(Collection dois, String email) throws IOException, InterruptedException { + String filter = dois .stream() .filter(Objects::nonNull) .map(Doi::toString) .collect(Collectors.joining("|")); + // e.g. https://api.openalex.org/works?filter=doi:10.1038%2Fs41598-024-73248-4|10.5194%2Ftc-2022-249-rc1&per-page=200 String worksUri = DOI_FILTER_URL_UNFORMATTED.formatted(Utils.urlEncode(filter)) + "&per-page=200"; HttpResponse response; @@ -67,6 +68,41 @@ public Collection mentionData(Collection dataciteDoi return mentions; } + public Collection mentionDataByOpenalexIds(Collection openalexIds, String email) throws IOException, InterruptedException { + String filter = openalexIds + .stream() + .filter(Objects::nonNull) + .map(OpenalexId::getOpenalexKey) + .collect(Collectors.joining("|")); + // e.g. https://api.openalex.org/works?filter=ids.openalex:W4402994101|W4319593220&per-page=200 + String worksUri = OPENALEX_ID_URL_UNFORMATTED.formatted(Utils.urlEncode(filter)) + "&per-page=200"; + + HttpResponse response; + if (email == null || email.isBlank()) { + response = Utils.getAsHttpResponse(worksUri); + } else { + response = Utils.getAsHttpResponse(worksUri, "User-Agent", "mailto:" + email); + } + + JsonObject tree = JsonParser.parseString(response.body()).getAsJsonObject(); + JsonArray citationsArray = tree + .getAsJsonArray("results"); + + Collection mentions = new ArrayList<>(); + for (JsonElement citation : citationsArray) { + ExternalMentionRecord citationAsMention; + try { + citationAsMention = parseCitationAsMention(citation); + } catch (RuntimeException e) { + Utils.saveExceptionInDatabase("OpenAlex mention scraper", "mention", null, e); + continue; + } + mentions.add(citationAsMention); + } + + return mentions; + } + public Collection citations(OpenalexId openalexId, Doi doi, String email, UUID id) throws IOException, InterruptedException { // This shouldn't happen, but let's check it to prevent unexpected exceptions: if (doi == null && openalexId == null) { diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java index def269a88..03ac67991 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java @@ -45,6 +45,10 @@ public String toUrlEncodedString() { return Utils.urlEncode(toString()); } + public String getOpenalexKey() { + return openalexKey; + } + @Override public String toString() { return OPENALEX_ID_BASE + openalexKey; diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java index 1b67a7068..1e841adae 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java @@ -59,7 +59,7 @@ static RsdMentionIds parseSingleRsdIds(String json) { } public Collection leastRecentlyScrapedMentions(int limit) { - String data = Utils.getAsAdmin(backendUrl + "/mention?doi=not.is.null&order=scraped_at.asc.nullsfirst&select=id,doi,openalex_id&limit=" + limit); + String data = Utils.getAsAdmin(backendUrl + "/mention?or=(doi.not.is.null,openalex_id.not.is.null)&order=scraped_at.asc.nullsfirst&select=id,doi,openalex_id&limit=" + limit); return parseMultipleRsdIds(data); } diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitationsTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitationsTest.java index 1750e2de0..9addd5482 100644 --- a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitationsTest.java +++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitationsTest.java @@ -22,7 +22,7 @@ void givenLocationWithBackSlashes_whenExtractedAsLocation_thenSlashesUrlEncoded( location.addProperty("landing_page_url", "https://www.example.com/path\\with\\slash"); array.add(location); - URI result = OpenAlexCitations.extractUrlFromLocation(array); + URI result = OpenAlexConnector.extractUrlFromLocation(array); Assertions.assertNotNull(result); Assertions.assertEquals("https://www.example.com/path%5Cwith%5Cslash", result.toString()); From c14a92ed3f77a15de25ba2ca12c01512a2c19a06 Mon Sep 17 00:00:00 2001 From: Ewan Cahen Date: Thu, 10 Oct 2024 11:54:53 +0200 Subject: [PATCH 4/4] docs: add info on OpenAlex to mentions sections --- documentation/docs/01-users/05-adding-software.md | 6 +++--- documentation/docs/01-users/07-adding-projects.md | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/documentation/docs/01-users/05-adding-software.md b/documentation/docs/01-users/05-adding-software.md index bc590b7e2..bdae64f70 100644 --- a/documentation/docs/01-users/05-adding-software.md +++ b/documentation/docs/01-users/05-adding-software.md @@ -164,11 +164,11 @@ This section allows you to add mentions to your software page. You can use this ### Reference papers -Use the *Search* box on the right hand side to find papers by DOI or title. All the relevant data about the publication will be retrieved automatically. A background scraper will use [OpenAlex](https://openalex.org/) to collect all citations of reference papers that have a DOI or an OpenAlex ID. +Use the *Search* box on the right hand side to find papers by DOI, OpenAlex ID or title. All the relevant data about the publication will be retrieved automatically. A background scraper will use [OpenAlex](https://openalex.org/) to collect all citations of reference papers that have a DOI or an OpenAlex ID. ### Citations -These are the citations of the reference papers that the RSD scraper was able to find on [OpenAlex](https://openalex.org/. It can take a few minutes before the citations are harvested. +These are the citations of the reference papers that the RSD scraper was able to find on [OpenAlex](https://openalex.org/). It can take a few minutes before the citations are harvested. :::warning You cannot edit the content of this section. All entries are automatically harvested and generated by the RSD scraper. The mentions found are displayed in the mentions section of the software page. @@ -176,7 +176,7 @@ You cannot edit the content of this section. All entries are automatically harve ### Related output -Here you can add all additional related output. Use search to find papers or other publications by DOI or title. It is also possible to bulk add mentions, that have a DOI (use the *Import* button). On the popup, you can add one DOI per line, with a maximum of 50. After clicking on the *Next* button, we will fetch the data, which can take a moment. When that is done, you will see an overview of the data we fetched, including possible errors, where you can check the data and possibly disable some of the mentions. +Here you can add all additional related output. Use search to find papers or other publications by DOI, OpenAlex ID or title. It is also possible to bulk add mentions, that have a DOI (use the *Import* button). On the popup, you can add one DOI per line, with a maximum of 50. After clicking on the *Next* button, we will fetch the data, which can take a moment. When that is done, you will see an overview of the data we fetched, including possible errors, where you can check the data and possibly disable some of the mentions. ## Testimonials diff --git a/documentation/docs/01-users/07-adding-projects.md b/documentation/docs/01-users/07-adding-projects.md index 41b887e62..a979acf01 100644 --- a/documentation/docs/01-users/07-adding-projects.md +++ b/documentation/docs/01-users/07-adding-projects.md @@ -95,7 +95,7 @@ Here you can add output that was produced by the project itself, such as papers, #### Add output -To add an item, the search bar on the left can be used to search the RSD, [Crossref](https://crossref.org), and [DataCite](https://datacite.org) databases using the **Title** or **DOI** of the research output. An item can be added by selecting it from the list of the search results. The RSD will automatically classify the item based on the available metadata. +To add an item, the search bar on the left can be used to search the RSD, [Crossref](https://www.crossref.org), [DataCite](https://datacite.org) and [OpenAlex](https://openalex.org/) databases using the **Title**, **DOI** or **OpenAlex ID** of the research output. An item can be added by selecting it from the list of the search results. The RSD will automatically classify the item based on the available metadata. #### Import output @@ -124,7 +124,7 @@ Here you can add mentions of your project that cannot be found automatically by #### Search publication -To add an item, the search bar on the left can be used to search the RSD, [Crossref](https://crossref.org), and [DataCite](https://datacite.org) databases using the **Title** or **DOI** of the research output. An item can be added by selecting it from the list of search results. The RSD will automatically classify the item based on the available metadata. +To add an item, the search bar on the left can be used to search the RSD, [Crossref](https://www.crossref.org), [DataCite](https://datacite.org) and [OpenAlex](https://openalex.org/) databases using the **Title**, **DOI** or **OpenAlex ID** of the research impact item. An item can be added by selecting it from the list of search results. The RSD will automatically classify the item based on the available metadata. #### Import publication