From 5c9338af4a579ea726c5acaeb8d06c1b12bd2767 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 11 Apr 2024 00:24:08 +0100 Subject: [PATCH] :bug: Request DOI metadata as CSL-JSON after BibTeX (#1073) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support CSL-JSON from doi.org too * fix: run built-in cleanup * refactor: rename DOI function * fix: export BibTeX using citation-js * 🔧 Handle totally non-existent dois * 🐛 Ensure citation nodes resolved from CSL-JSON have label --------- Co-authored-by: Franklin Koch Co-authored-by: Rowan Cockett --- .changeset/many-pianos-approve.md | 7 + .changeset/ten-rockets-buy.md | 5 + packages/citation-js-utils/src/index.ts | 113 +++++++++--- .../citation-js-utils/tests/basic.spec.ts | 17 +- .../types/citation-js/index.d.ts | 11 +- .../myst-cli/src/build/utils/bibtex.spec.ts | 35 ---- packages/myst-cli/src/build/utils/bibtex.ts | 30 +-- packages/myst-cli/src/process/citations.ts | 10 +- packages/myst-cli/src/process/file.ts | 4 +- packages/myst-cli/src/transforms/doi.spec.ts | 38 +++- packages/myst-cli/src/transforms/dois.ts | 174 +++++++++++++----- packages/myst-to-jats/src/backmatter.ts | 4 +- packages/mystmd/tests/dois/index.md | 24 +++ packages/mystmd/tests/dois/myst.yml | 14 ++ packages/mystmd/tests/dois/references.bib | 50 +++++ packages/mystmd/tests/endToEnd.spec.ts | 52 +++--- packages/mystmd/tests/exports.yml | 8 + packages/mystmd/tests/outputs/dois.bib | 107 +++++++++++ packages/mystmd/tests/outputs/dois.tex | 54 ++++++ 19 files changed, 575 insertions(+), 182 deletions(-) create mode 100644 .changeset/many-pianos-approve.md create mode 100644 .changeset/ten-rockets-buy.md delete mode 100644 packages/myst-cli/src/build/utils/bibtex.spec.ts create mode 100644 packages/mystmd/tests/dois/index.md create mode 100644 packages/mystmd/tests/dois/myst.yml create mode 100644 packages/mystmd/tests/dois/references.bib create mode 100644 packages/mystmd/tests/outputs/dois.bib create mode 100644 packages/mystmd/tests/outputs/dois.tex diff --git a/.changeset/many-pianos-approve.md b/.changeset/many-pianos-approve.md new file mode 100644 index 000000000..30e2a150f --- /dev/null +++ b/.changeset/many-pianos-approve.md @@ -0,0 +1,7 @@ +--- +"citation-js-utils": minor +"myst-to-jats": patch +"myst-cli": patch +--- + +Load citations from CSL and non-CSL diff --git a/.changeset/ten-rockets-buy.md b/.changeset/ten-rockets-buy.md new file mode 100644 index 000000000..9a2482f8a --- /dev/null +++ b/.changeset/ten-rockets-buy.md @@ -0,0 +1,5 @@ +--- +'mystmd': patch +--- + +Add end-to-end tests for various DOIs diff --git a/packages/citation-js-utils/src/index.ts b/packages/citation-js-utils/src/index.ts index 40448b217..d4e496a2b 100644 --- a/packages/citation-js-utils/src/index.ts +++ b/packages/citation-js-utils/src/index.ts @@ -1,5 +1,5 @@ -import type { OutputOptions } from '@citation-js/core'; import { Cite } from '@citation-js/core'; +import { clean as cleanCSL } from '@citation-js/core/lib/plugins/input/csl.js'; import sanitizeHtml from 'sanitize-html'; import '@citation-js/plugin-bibtex'; @@ -8,10 +8,10 @@ import '@citation-js/plugin-csl'; const DOI_IN_TEXT = /(10.\d{4,9}\/[-._;()/:A-Z0-9]*[A-Z0-9])/i; // This is duplicated in citation-js types, which are not exported -export type CitationJson = { +export type CSL = { type?: 'article-journal' | string; id: string; - author?: { given: string; family: string }[]; + author?: { given: string; family: string; literal?: string }[]; issued?: { 'date-parts'?: number[][]; literal?: string }; publisher?: string; title?: string; @@ -47,14 +47,6 @@ function cleanRef(citation: string) { return cleanHtml.replace(/^1\./g, '').replace(/&/g, '&').trim(); } -// eslint-disable-next-line @typescript-eslint/no-unused-vars -const defaultOpts: OutputOptions = { - format: 'string', - type: 'json', - style: 'ris', - lang: 'en-US', -}; - export enum CitationJSStyles { 'apa' = 'citation-apa', 'vancouver' = 'citation-vancouver', @@ -66,14 +58,7 @@ export enum InlineCite { 't' = 't', } -const defaultString: OutputOptions = { - format: 'string', - lang: 'en-US', - type: 'html', - style: CitationJSStyles.apa, -}; - -export function yearFromCitation(data: CitationJson) { +export function yearFromCitation(data: CSL) { let year: number | string | undefined = data.issued?.['date-parts']?.[0]?.[0]; if (year) return year; year = data.issued?.['literal']?.match(/\b[12][0-9]{3}\b/)?.[0]; @@ -81,7 +66,7 @@ export function yearFromCitation(data: CitationJson) { return 'n.d.'; } -export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: InlineOptions) { +export function getInlineCitation(data: CSL, kind: InlineCite, opts?: InlineOptions) { let authors = data.author; if (!authors || authors.length === 0) { authors = data.editor; @@ -112,7 +97,7 @@ export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: I } if (authors.length > 2) { return [ - { type: 'text', value: `${prefix}${authors[0].family} ` }, + { type: 'text', value: `${prefix}${authors[0].family ?? authors[0].literal} ` }, { type: 'emphasis', children: [{ type: 'text', value: 'et al.' }] }, { type: 'text', value: `${yearPart}` }, ]; @@ -129,7 +114,9 @@ export type CitationRenderer = Record< inline: (kind?: InlineCite, opts?: InlineOptions) => InlineNode[]; getDOI: () => string | undefined; getURL: () => string | undefined; - cite: CitationJson; + cite: CSL; + getLabel: () => string; + exportBibTeX: () => string; } >; @@ -171,12 +158,58 @@ export function firstNonDoiUrl(str?: string, doi?: string) { return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi)); } +/** + * Parse a citation style of the form `citation-