diff --git a/CHANGELOG.md b/CHANGELOG.md index 12b8f3c24a9..ea79b7a3dc9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - We added ability to push entries to TeXworks. [#3197](https://github.com/JabRef/jabref/issues/3197) - We added the ability to zoom in and out in the document viewer using Ctrl + Scroll. [#10964](https://github.com/JabRef/jabref/pull/10964) - We added a Cleanup for removing non-existent files and grouped the related options [#10929](https://github.com/JabRef/jabref/issues/10929) +- We added the functionality to parse the bibliography of PDFs using the GROBID online service. [#10200](https://github.com/JabRef/jabref/issues/10200) ### Changed diff --git a/src/main/java/org/jabref/gui/actions/StandardActions.java b/src/main/java/org/jabref/gui/actions/StandardActions.java index 6f74e2caa1f..949dac28b29 100644 --- a/src/main/java/org/jabref/gui/actions/StandardActions.java +++ b/src/main/java/org/jabref/gui/actions/StandardActions.java @@ -32,6 +32,7 @@ public enum StandardActions implements Action { REBUILD_FULLTEXT_SEARCH_INDEX(Localization.lang("Rebuild fulltext search index"), IconTheme.JabRefIcons.FILE), REDOWNLOAD_MISSING_FILES(Localization.lang("Redownload missing files"), IconTheme.JabRefIcons.DOWNLOAD), OPEN_EXTERNAL_FILE(Localization.lang("Open file"), IconTheme.JabRefIcons.FILE, KeyBinding.OPEN_FILE), + EXTRACT_FILE_REFERENCES(Localization.lang("Extract references from file"), IconTheme.JabRefIcons.FILE_STAR), OPEN_URL(Localization.lang("Open URL or DOI"), IconTheme.JabRefIcons.WWW, KeyBinding.OPEN_URL_OR_DOI), SEARCH_SHORTSCIENCE(Localization.lang("Search ShortScience")), MERGE_WITH_FETCHED_ENTRY(Localization.lang("Get bibliographic data from %0", "DOI/ISBN/...")), diff --git a/src/main/java/org/jabref/gui/icon/IconTheme.java b/src/main/java/org/jabref/gui/icon/IconTheme.java index 470357981af..cc9bad0528b 100644 --- a/src/main/java/org/jabref/gui/icon/IconTheme.java +++ b/src/main/java/org/jabref/gui/icon/IconTheme.java @@ -193,6 +193,7 @@ public enum JabRefIcons implements JabRefIcon { DELETE_ENTRY(MaterialDesignD.DELETE), SEARCH(MaterialDesignM.MAGNIFY), FILE_SEARCH(MaterialDesignF.FILE_FIND), + FILE_STAR(MaterialDesignF.FILE_STAR), PDF_METADATA_READ(MaterialDesignF.FORMAT_ALIGN_TOP), PDF_METADATA_WRITE(MaterialDesignF.FORMAT_ALIGN_BOTTOM), ADVANCED_SEARCH(Color.CYAN, MaterialDesignM.MAGNIFY), diff --git a/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java b/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java new file mode 100644 index 00000000000..d46854c5f55 --- /dev/null +++ b/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java @@ -0,0 +1,101 @@ +package org.jabref.gui.maintable; + +import java.nio.file.Path; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.Callable; + +import org.jabref.gui.DialogService; +import org.jabref.gui.StateManager; +import org.jabref.gui.actions.ActionHelper; +import org.jabref.gui.actions.SimpleCommand; +import org.jabref.gui.importer.ImportEntriesDialog; +import org.jabref.gui.util.BackgroundTask; +import org.jabref.gui.util.TaskExecutor; +import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.importer.util.GrobidService; +import org.jabref.logic.l10n.Localization; +import org.jabref.logic.util.io.FileUtil; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.LinkedFile; +import org.jabref.preferences.PreferencesService; + +public class ExtractReferencesAction extends SimpleCommand { + private final int FILES_LIMIT = 10; + + private final DialogService dialogService; + private final StateManager stateManager; + private final PreferencesService preferencesService; + private final BibEntry entry; + private final LinkedFile linkedFile; + private final TaskExecutor taskExecutor; + + public ExtractReferencesAction(DialogService dialogService, + StateManager stateManager, + PreferencesService preferencesService, + TaskExecutor taskExecutor) { + this(dialogService, stateManager, preferencesService, null, null, taskExecutor); + } + + public ExtractReferencesAction(DialogService dialogService, + StateManager stateManager, + PreferencesService preferencesService, + BibEntry entry, + LinkedFile linkedFile, + TaskExecutor taskExecutor) { + this.dialogService = dialogService; + this.stateManager = stateManager; + this.preferencesService = preferencesService; + this.entry = entry; + this.linkedFile = linkedFile; + this.taskExecutor = taskExecutor; + + if (this.linkedFile == null) { + this.executable.bind( + ActionHelper.needsEntriesSelected(stateManager) + .and(ActionHelper.hasLinkedFileForSelectedEntries(stateManager)) + .and(this.preferencesService.getGrobidPreferences().grobidEnabledProperty()) + ); + } else { + this.setExecutable(true); + } + } + + @Override + public void execute() { + extractReferences(); + } + + private void extractReferences() { + stateManager.getActiveDatabase().ifPresent(databaseContext -> { + List selectedEntries = new LinkedList<>(); + if (entry == null) { + selectedEntries = stateManager.getSelectedEntries(); + } else { + selectedEntries.add(entry); + } + + List fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences())); + if (fileList.size() > FILES_LIMIT) { + boolean continueOpening = dialogService.showConfirmationDialogAndWait(Localization.lang("Processing a large number of files"), + Localization.lang("You are about to process %0 files. Continue?", fileList.size()), + Localization.lang("Continue"), Localization.lang("Cancel")); + if (!continueOpening) { + return; + } + } + + Callable parserResultCallable = () -> new ParserResult( + new GrobidService(this.preferencesService.getGrobidPreferences()).processReferences(fileList, preferencesService.getImportFormatPreferences()) + ); + BackgroundTask task = BackgroundTask.wrap(parserResultCallable) + .withInitialMessage(Localization.lang("Processing PDF(s)")); + + task.onFailure(dialogService::showErrorDialogAndWait); + + ImportEntriesDialog dialog = new ImportEntriesDialog(stateManager.getActiveDatabase().get(), task); + dialog.setTitle(Localization.lang("Extract References")); + dialogService.showCustomDialogAndWait(dialog); + }); + } +} diff --git a/src/main/java/org/jabref/gui/maintable/RightClickMenu.java b/src/main/java/org/jabref/gui/maintable/RightClickMenu.java index 6e65da6ab13..aa8a5477902 100644 --- a/src/main/java/org/jabref/gui/maintable/RightClickMenu.java +++ b/src/main/java/org/jabref/gui/maintable/RightClickMenu.java @@ -75,6 +75,7 @@ public static ContextMenu create(BibEntryTableViewModel entry, factory.createMenuItem(StandardActions.ATTACH_FILE_FROM_URL, new AttachFileFromURLAction(dialogService, stateManager, taskExecutor, preferencesService)), factory.createMenuItem(StandardActions.OPEN_FOLDER, new OpenFolderAction(dialogService, stateManager, preferencesService, taskExecutor)), factory.createMenuItem(StandardActions.OPEN_EXTERNAL_FILE, new OpenExternalFileAction(dialogService, stateManager, preferencesService, taskExecutor)), + factory.createMenuItem(StandardActions.EXTRACT_FILE_REFERENCES, new ExtractReferencesAction(dialogService, stateManager, preferencesService, taskExecutor)), factory.createMenuItem(StandardActions.OPEN_URL, new OpenUrlAction(dialogService, stateManager, preferencesService)), factory.createMenuItem(StandardActions.SEARCH_SHORTSCIENCE, new SearchShortScienceAction(dialogService, stateManager, preferencesService)), diff --git a/src/main/java/org/jabref/logic/importer/util/GrobidService.java b/src/main/java/org/jabref/logic/importer/util/GrobidService.java index 514e43d0513..f3449f68be0 100644 --- a/src/main/java/org/jabref/logic/importer/util/GrobidService.java +++ b/src/main/java/org/jabref/logic/importer/util/GrobidService.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -84,6 +85,34 @@ public List processPDF(Path filePath, ImportFormatPreferences importFo String httpResponse = response.body(); + return getBibEntries(importFormatPreferences, httpResponse); + } + + public List processReferences(List pathList, ImportFormatPreferences importFormatPreferences) throws IOException, ParseException { + List entries = new ArrayList<>(); + for (Path filePath: pathList) { + entries.addAll(processReferences(filePath, importFormatPreferences)); + } + + return entries; + } + + public List processReferences(Path filePath, ImportFormatPreferences importFormatPreferences) throws IOException, ParseException { + Connection.Response response = Jsoup.connect(grobidPreferences.getGrobidURL() + "/api/processReferences") + .header("Accept", MediaTypes.APPLICATION_BIBTEX) + .data("input", filePath.toString(), Files.newInputStream(filePath)) + .data("consolidateCitations", String.valueOf(ConsolidateCitations.WITH_METADATA)) + .method(Connection.Method.POST) + .ignoreContentType(true) + .timeout(20000) + .execute(); + + String httpResponse = response.body(); + + return getBibEntries(importFormatPreferences, httpResponse); + } + + private static List getBibEntries(ImportFormatPreferences importFormatPreferences, String httpResponse) throws IOException, ParseException { if (httpResponse == null || "@misc{-1,\n author = {}\n}\n".equals(httpResponse)) { // This filters empty BibTeX entries throw new IOException("The GROBID server response does not contain anything."); } diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 2b05bedc01a..9b6b0a31c45 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -311,6 +311,12 @@ Export\ preferences\ to\ file=Export preferences to file Export\ to\ clipboard=Export to clipboard Export\ to\ text\ file.=Export to text file. +Extract\ references\ from\ file=Extract references from file +Extract\ References=Extract References +Processing\ PDF(s)=Processing PDF(s) +Processing\ a\ large\ number\ of\ files=Processing a large number of files +You\ are\ about\ to\ process\ %0\ files.\ Continue?=You are about to process %0 files. Continue? + Exporting\ %0=Exporting %0 Could\ not\ export\ file\ '%0'\ (reason\:\ %1)=Could not export file '%0' (reason: %1) Unknown\ export\ format\ %0=Unknown export format %0 diff --git a/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java b/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java index 9898ec9fba2..daca2f700c2 100644 --- a/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java +++ b/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java @@ -4,6 +4,7 @@ import java.net.URISyntaxException; import java.nio.file.Path; import java.util.List; +import java.util.Objects; import java.util.Optional; import org.jabref.logic.importer.ImportFormatPreferences; @@ -100,4 +101,22 @@ public void processPdfTest() throws IOException, ParseException, URISyntaxExcept // assertEquals(Optional.of("Paper Title"), be0.getField(StandardField.TITLE)); // assertEquals(Optional.of("2014-10-05"), be0.getField(StandardField.DATE)); } + + @Test + public void extractsReferencesFromPdf() throws IOException, ParseException, URISyntaxException { + BibEntry ref1 = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "Kopp, O") + .withField(StandardField.ADDRESS, "Berlin Heidelberg") + .withField(StandardField.DATE, "2013") + .withField(StandardField.JOURNAL, "All links were last followed on October") + .withField(StandardField.PAGES, "700--704") + .withField(StandardField.PUBLISHER, "Springer") + .withField(StandardField.TITLE, "Winery -A Modeling Tool for TOSCA-based Cloud Applications") + .withField(StandardField.VOLUME, "8274") + .withField(StandardField.YEAR, "2013"); + + Path file = Path.of(Objects.requireNonNull(PdfGrobidImporterTest.class.getResource("LNCS-minimal.pdf")).toURI()); + List extractedReferences = grobidService.processReferences(file, importFormatPreferences); + assertEquals(List.of(ref1), extractedReferences); + } } diff --git a/src/test/resources/pdfs/IEEE/.gitignore b/src/test/resources/pdfs/IEEE/.gitignore new file mode 100644 index 00000000000..0509c598856 --- /dev/null +++ b/src/test/resources/pdfs/IEEE/.gitignore @@ -0,0 +1,314 @@ +# Created by https://www.toptal.com/developers/gitignore/api/latex +# Edit at https://www.toptal.com/developers/gitignore?templates=latex + +### LaTeX ### +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs +*.slg +*.slo +*.sls + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplot +*.gnuplot +*.table + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.glog +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files +# *.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# newpax +*.newpax + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# svg +svg-inkscape/ + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# titletoc +*.ptc + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +# xwatermark package +*.xwm + +# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib +# option is specified. Footnotes are the stored in a file with suffix Notes.bib. +# Uncomment the next line to have this generated file ignored. +#*Notes.bib + +### LaTeX Patch ### +# LIPIcs / OASIcs +*.vtc + +# glossaries +*.glstex + +# End of https://www.toptal.com/developers/gitignore/api/latex diff --git a/src/test/resources/pdfs/IEEE/extract-references-test.bib b/src/test/resources/pdfs/IEEE/extract-references-test.bib new file mode 100644 index 00000000000..eaddd5e3f66 --- /dev/null +++ b/src/test/resources/pdfs/IEEE/extract-references-test.bib @@ -0,0 +1,8 @@ +@Article{, + title = {JabRef Example for References Parsing}, + file = {:ieee-paper.pdf:PDF}, +} + +@Comment{jabref-meta: databaseType:bibtex;} + +@Comment{jabref-meta: fileDirectory:.;} diff --git a/src/test/resources/pdfs/IEEE/ieee-paper.bib b/src/test/resources/pdfs/IEEE/ieee-paper.bib new file mode 100644 index 00000000000..38533080cc0 --- /dev/null +++ b/src/test/resources/pdfs/IEEE/ieee-paper.bib @@ -0,0 +1,51 @@ +@Article{Alver2007, + author = {Alver, Morten Omholt and Tenn{\o}y, Torodd and Alfredsen, Jo Arve and {\O}ie, Gunvor}, + journal = {Aquacultural engineering}, + title = {Automatic measurement of rotifer Brachionus plicatilis densities in first feeding tanks}, + year = {2007}, + number = {2}, + pages = {115--121}, + volume = {36}, + publisher = {Elsevier}, +} + +@Article{Alver2007a, + author = {Alver, Morten Omholt and others}, + journal = {Aquaculture}, + title = {Estimating larval density in cod (Gadus morhua) first feeding tanks using measurements of feed density and larval growth rates}, + year = {2007}, + number = {1}, + pages = {216--226}, + volume = {268}, + publisher = {Elsevier}, +} + +@InProceedings{Kopp2018, + author = {Kopp, Oliver and Armbruster, Anita and Zimmermann, Olaf}, + booktitle = {ZEUS}, + title = {Markdown Architectural Decision Records: Format and Tool Support}, + year = {2018}, + publisher = {CEUR-WS.org}, +} + +@InProceedings{Kopp2012, + author = {Oliver Kopp and others}, + booktitle = {Business Process Model and Notation}, + title = {{BPMN4TOSCA:} {A} Domain-Specific Language to Model Management Plans for Composite Applications}, + year = {2012}, + publisher = {Springer}, + series = {LNCS}, + volume = {125}, + doi = {10.1007/978-3-642-33155-8_4}, +} + +@InProceedings{Koenig2023, + author = {Simone König and others}, + booktitle = {INDIN}, + title = {{BPMN4Cars}: A Car-Tailored Workflow Engine}, + year = {2023}, + publisher = {IEEE}, + doi = {10.1109/indin51400.2023.10218082}, +} + +@Comment{jabref-meta: databaseType:bibtex;} diff --git a/src/test/resources/pdfs/IEEE/ieee-paper.pdf b/src/test/resources/pdfs/IEEE/ieee-paper.pdf new file mode 100644 index 00000000000..408d16f8254 Binary files /dev/null and b/src/test/resources/pdfs/IEEE/ieee-paper.pdf differ diff --git a/src/test/resources/pdfs/IEEE/ieee-paper.tex b/src/test/resources/pdfs/IEEE/ieee-paper.tex new file mode 100644 index 00000000000..62de1f28d8d --- /dev/null +++ b/src/test/resources/pdfs/IEEE/ieee-paper.tex @@ -0,0 +1,39 @@ +\documentclass[conference,a4paper,english]{IEEEtran}[2015/08/26] + +\usepackage{lipsum} +\usepackage{hyperref} +\usepackage[keeplastbox]{flushend} + +\begin{document} +\title{JabRef Example for Reference Parsing} +\author{% + \IEEEauthorblockN{First Author} + \IEEEauthorblockA{University of Examples, Germany\\ + \{lastname\}@example.org} +} + +\maketitle + +\begin{abstract} +\lipsum[1] +\end{abstract} + +\section{Introduction} +\lipsum[2] + +\section{Related Work} +\lipsum[3] +\cite{Alver2007,Alver2007a,Kopp2012,Kopp2018,Koenig2023} + +\section{Contribution} +\lipsum[4-7] + +\section{Conclusion and Outlook} +\lipsum[4] + +\atColsEnd{\vfil} + +\bibliographystyle{IEEEtran} +\bibliography{IEEEabrv,ieee-paper} + +\end{document}