diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12b8f3c24a9..ea79b7a3dc9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We added ability to push entries to TeXworks. [#3197](https://github.com/JabRef/jabref/issues/3197)
- We added the ability to zoom in and out in the document viewer using Ctrl + Scroll. [#10964](https://github.com/JabRef/jabref/pull/10964)
- We added a Cleanup for removing non-existent files and grouped the related options [#10929](https://github.com/JabRef/jabref/issues/10929)
+- We added the functionality to parse the bibliography of PDFs using the GROBID online service. [#10200](https://github.com/JabRef/jabref/issues/10200)
### Changed
diff --git a/src/main/java/org/jabref/gui/actions/StandardActions.java b/src/main/java/org/jabref/gui/actions/StandardActions.java
index 6f74e2caa1f..949dac28b29 100644
--- a/src/main/java/org/jabref/gui/actions/StandardActions.java
+++ b/src/main/java/org/jabref/gui/actions/StandardActions.java
@@ -32,6 +32,7 @@ public enum StandardActions implements Action {
REBUILD_FULLTEXT_SEARCH_INDEX(Localization.lang("Rebuild fulltext search index"), IconTheme.JabRefIcons.FILE),
REDOWNLOAD_MISSING_FILES(Localization.lang("Redownload missing files"), IconTheme.JabRefIcons.DOWNLOAD),
OPEN_EXTERNAL_FILE(Localization.lang("Open file"), IconTheme.JabRefIcons.FILE, KeyBinding.OPEN_FILE),
+ EXTRACT_FILE_REFERENCES(Localization.lang("Extract references from file"), IconTheme.JabRefIcons.FILE_STAR),
OPEN_URL(Localization.lang("Open URL or DOI"), IconTheme.JabRefIcons.WWW, KeyBinding.OPEN_URL_OR_DOI),
SEARCH_SHORTSCIENCE(Localization.lang("Search ShortScience")),
MERGE_WITH_FETCHED_ENTRY(Localization.lang("Get bibliographic data from %0", "DOI/ISBN/...")),
diff --git a/src/main/java/org/jabref/gui/icon/IconTheme.java b/src/main/java/org/jabref/gui/icon/IconTheme.java
index 470357981af..cc9bad0528b 100644
--- a/src/main/java/org/jabref/gui/icon/IconTheme.java
+++ b/src/main/java/org/jabref/gui/icon/IconTheme.java
@@ -193,6 +193,7 @@ public enum JabRefIcons implements JabRefIcon {
DELETE_ENTRY(MaterialDesignD.DELETE),
SEARCH(MaterialDesignM.MAGNIFY),
FILE_SEARCH(MaterialDesignF.FILE_FIND),
+ FILE_STAR(MaterialDesignF.FILE_STAR),
PDF_METADATA_READ(MaterialDesignF.FORMAT_ALIGN_TOP),
PDF_METADATA_WRITE(MaterialDesignF.FORMAT_ALIGN_BOTTOM),
ADVANCED_SEARCH(Color.CYAN, MaterialDesignM.MAGNIFY),
diff --git a/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java b/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java
new file mode 100644
index 00000000000..d46854c5f55
--- /dev/null
+++ b/src/main/java/org/jabref/gui/maintable/ExtractReferencesAction.java
@@ -0,0 +1,101 @@
+package org.jabref.gui.maintable;
+
+import java.nio.file.Path;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import org.jabref.gui.DialogService;
+import org.jabref.gui.StateManager;
+import org.jabref.gui.actions.ActionHelper;
+import org.jabref.gui.actions.SimpleCommand;
+import org.jabref.gui.importer.ImportEntriesDialog;
+import org.jabref.gui.util.BackgroundTask;
+import org.jabref.gui.util.TaskExecutor;
+import org.jabref.logic.importer.ParserResult;
+import org.jabref.logic.importer.util.GrobidService;
+import org.jabref.logic.l10n.Localization;
+import org.jabref.logic.util.io.FileUtil;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.LinkedFile;
+import org.jabref.preferences.PreferencesService;
+
+public class ExtractReferencesAction extends SimpleCommand {
+ private final int FILES_LIMIT = 10;
+
+ private final DialogService dialogService;
+ private final StateManager stateManager;
+ private final PreferencesService preferencesService;
+ private final BibEntry entry;
+ private final LinkedFile linkedFile;
+ private final TaskExecutor taskExecutor;
+
+ public ExtractReferencesAction(DialogService dialogService,
+ StateManager stateManager,
+ PreferencesService preferencesService,
+ TaskExecutor taskExecutor) {
+ this(dialogService, stateManager, preferencesService, null, null, taskExecutor);
+ }
+
+ public ExtractReferencesAction(DialogService dialogService,
+ StateManager stateManager,
+ PreferencesService preferencesService,
+ BibEntry entry,
+ LinkedFile linkedFile,
+ TaskExecutor taskExecutor) {
+ this.dialogService = dialogService;
+ this.stateManager = stateManager;
+ this.preferencesService = preferencesService;
+ this.entry = entry;
+ this.linkedFile = linkedFile;
+ this.taskExecutor = taskExecutor;
+
+ if (this.linkedFile == null) {
+ this.executable.bind(
+ ActionHelper.needsEntriesSelected(stateManager)
+ .and(ActionHelper.hasLinkedFileForSelectedEntries(stateManager))
+ .and(this.preferencesService.getGrobidPreferences().grobidEnabledProperty())
+ );
+ } else {
+ this.setExecutable(true);
+ }
+ }
+
+ @Override
+ public void execute() {
+ extractReferences();
+ }
+
+ private void extractReferences() {
+ stateManager.getActiveDatabase().ifPresent(databaseContext -> {
+ List selectedEntries = new LinkedList<>();
+ if (entry == null) {
+ selectedEntries = stateManager.getSelectedEntries();
+ } else {
+ selectedEntries.add(entry);
+ }
+
+ List fileList = FileUtil.getListOfLinkedFiles(selectedEntries, databaseContext.getFileDirectories(preferencesService.getFilePreferences()));
+ if (fileList.size() > FILES_LIMIT) {
+ boolean continueOpening = dialogService.showConfirmationDialogAndWait(Localization.lang("Processing a large number of files"),
+ Localization.lang("You are about to process %0 files. Continue?", fileList.size()),
+ Localization.lang("Continue"), Localization.lang("Cancel"));
+ if (!continueOpening) {
+ return;
+ }
+ }
+
+ Callable parserResultCallable = () -> new ParserResult(
+ new GrobidService(this.preferencesService.getGrobidPreferences()).processReferences(fileList, preferencesService.getImportFormatPreferences())
+ );
+ BackgroundTask task = BackgroundTask.wrap(parserResultCallable)
+ .withInitialMessage(Localization.lang("Processing PDF(s)"));
+
+ task.onFailure(dialogService::showErrorDialogAndWait);
+
+ ImportEntriesDialog dialog = new ImportEntriesDialog(stateManager.getActiveDatabase().get(), task);
+ dialog.setTitle(Localization.lang("Extract References"));
+ dialogService.showCustomDialogAndWait(dialog);
+ });
+ }
+}
diff --git a/src/main/java/org/jabref/gui/maintable/RightClickMenu.java b/src/main/java/org/jabref/gui/maintable/RightClickMenu.java
index 6e65da6ab13..aa8a5477902 100644
--- a/src/main/java/org/jabref/gui/maintable/RightClickMenu.java
+++ b/src/main/java/org/jabref/gui/maintable/RightClickMenu.java
@@ -75,6 +75,7 @@ public static ContextMenu create(BibEntryTableViewModel entry,
factory.createMenuItem(StandardActions.ATTACH_FILE_FROM_URL, new AttachFileFromURLAction(dialogService, stateManager, taskExecutor, preferencesService)),
factory.createMenuItem(StandardActions.OPEN_FOLDER, new OpenFolderAction(dialogService, stateManager, preferencesService, taskExecutor)),
factory.createMenuItem(StandardActions.OPEN_EXTERNAL_FILE, new OpenExternalFileAction(dialogService, stateManager, preferencesService, taskExecutor)),
+ factory.createMenuItem(StandardActions.EXTRACT_FILE_REFERENCES, new ExtractReferencesAction(dialogService, stateManager, preferencesService, taskExecutor)),
factory.createMenuItem(StandardActions.OPEN_URL, new OpenUrlAction(dialogService, stateManager, preferencesService)),
factory.createMenuItem(StandardActions.SEARCH_SHORTSCIENCE, new SearchShortScienceAction(dialogService, stateManager, preferencesService)),
diff --git a/src/main/java/org/jabref/logic/importer/util/GrobidService.java b/src/main/java/org/jabref/logic/importer/util/GrobidService.java
index 514e43d0513..f3449f68be0 100644
--- a/src/main/java/org/jabref/logic/importer/util/GrobidService.java
+++ b/src/main/java/org/jabref/logic/importer/util/GrobidService.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
@@ -84,6 +85,34 @@ public List processPDF(Path filePath, ImportFormatPreferences importFo
String httpResponse = response.body();
+ return getBibEntries(importFormatPreferences, httpResponse);
+ }
+
+ public List processReferences(List pathList, ImportFormatPreferences importFormatPreferences) throws IOException, ParseException {
+ List entries = new ArrayList<>();
+ for (Path filePath: pathList) {
+ entries.addAll(processReferences(filePath, importFormatPreferences));
+ }
+
+ return entries;
+ }
+
+ public List processReferences(Path filePath, ImportFormatPreferences importFormatPreferences) throws IOException, ParseException {
+ Connection.Response response = Jsoup.connect(grobidPreferences.getGrobidURL() + "/api/processReferences")
+ .header("Accept", MediaTypes.APPLICATION_BIBTEX)
+ .data("input", filePath.toString(), Files.newInputStream(filePath))
+ .data("consolidateCitations", String.valueOf(ConsolidateCitations.WITH_METADATA))
+ .method(Connection.Method.POST)
+ .ignoreContentType(true)
+ .timeout(20000)
+ .execute();
+
+ String httpResponse = response.body();
+
+ return getBibEntries(importFormatPreferences, httpResponse);
+ }
+
+ private static List getBibEntries(ImportFormatPreferences importFormatPreferences, String httpResponse) throws IOException, ParseException {
if (httpResponse == null || "@misc{-1,\n author = {}\n}\n".equals(httpResponse)) { // This filters empty BibTeX entries
throw new IOException("The GROBID server response does not contain anything.");
}
diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties
index 2b05bedc01a..9b6b0a31c45 100644
--- a/src/main/resources/l10n/JabRef_en.properties
+++ b/src/main/resources/l10n/JabRef_en.properties
@@ -311,6 +311,12 @@ Export\ preferences\ to\ file=Export preferences to file
Export\ to\ clipboard=Export to clipboard
Export\ to\ text\ file.=Export to text file.
+Extract\ references\ from\ file=Extract references from file
+Extract\ References=Extract References
+Processing\ PDF(s)=Processing PDF(s)
+Processing\ a\ large\ number\ of\ files=Processing a large number of files
+You\ are\ about\ to\ process\ %0\ files.\ Continue?=You are about to process %0 files. Continue?
+
Exporting\ %0=Exporting %0
Could\ not\ export\ file\ '%0'\ (reason\:\ %1)=Could not export file '%0' (reason: %1)
Unknown\ export\ format\ %0=Unknown export format %0
diff --git a/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java b/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java
index 9898ec9fba2..daca2f700c2 100644
--- a/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java
+++ b/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java
@@ -4,6 +4,7 @@
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.util.List;
+import java.util.Objects;
import java.util.Optional;
import org.jabref.logic.importer.ImportFormatPreferences;
@@ -100,4 +101,22 @@ public void processPdfTest() throws IOException, ParseException, URISyntaxExcept
// assertEquals(Optional.of("Paper Title"), be0.getField(StandardField.TITLE));
// assertEquals(Optional.of("2014-10-05"), be0.getField(StandardField.DATE));
}
+
+ @Test
+ public void extractsReferencesFromPdf() throws IOException, ParseException, URISyntaxException {
+ BibEntry ref1 = new BibEntry(StandardEntryType.Article)
+ .withField(StandardField.AUTHOR, "Kopp, O")
+ .withField(StandardField.ADDRESS, "Berlin Heidelberg")
+ .withField(StandardField.DATE, "2013")
+ .withField(StandardField.JOURNAL, "All links were last followed on October")
+ .withField(StandardField.PAGES, "700--704")
+ .withField(StandardField.PUBLISHER, "Springer")
+ .withField(StandardField.TITLE, "Winery -A Modeling Tool for TOSCA-based Cloud Applications")
+ .withField(StandardField.VOLUME, "8274")
+ .withField(StandardField.YEAR, "2013");
+
+ Path file = Path.of(Objects.requireNonNull(PdfGrobidImporterTest.class.getResource("LNCS-minimal.pdf")).toURI());
+ List extractedReferences = grobidService.processReferences(file, importFormatPreferences);
+ assertEquals(List.of(ref1), extractedReferences);
+ }
}
diff --git a/src/test/resources/pdfs/IEEE/.gitignore b/src/test/resources/pdfs/IEEE/.gitignore
new file mode 100644
index 00000000000..0509c598856
--- /dev/null
+++ b/src/test/resources/pdfs/IEEE/.gitignore
@@ -0,0 +1,314 @@
+# Created by https://www.toptal.com/developers/gitignore/api/latex
+# Edit at https://www.toptal.com/developers/gitignore?templates=latex
+
+### LaTeX ###
+## Core latex/pdflatex auxiliary files:
+*.aux
+*.lof
+*.log
+*.lot
+*.fls
+*.out
+*.toc
+*.fmt
+*.fot
+*.cb
+*.cb2
+.*.lb
+
+## Intermediate documents:
+*.dvi
+*.xdv
+*-converted-to.*
+# these rules might exclude image files for figures etc.
+# *.ps
+# *.eps
+# *.pdf
+
+## Generated if empty string is given at "Please type another file name for output:"
+.pdf
+
+## Bibliography auxiliary files (bibtex/biblatex/biber):
+*.bbl
+*.bcf
+*.blg
+*-blx.aux
+*-blx.bib
+*.run.xml
+
+## Build tool auxiliary files:
+*.fdb_latexmk
+*.synctex
+*.synctex(busy)
+*.synctex.gz
+*.synctex.gz(busy)
+*.pdfsync
+
+## Build tool directories for auxiliary files
+# latexrun
+latex.out/
+
+## Auxiliary and intermediate files from other packages:
+# algorithms
+*.alg
+*.loa
+
+# achemso
+acs-*.bib
+
+# amsthm
+*.thm
+
+# beamer
+*.nav
+*.pre
+*.snm
+*.vrb
+
+# changes
+*.soc
+
+# comment
+*.cut
+
+# cprotect
+*.cpt
+
+# elsarticle (documentclass of Elsevier journals)
+*.spl
+
+# endnotes
+*.ent
+
+# fixme
+*.lox
+
+# feynmf/feynmp
+*.mf
+*.mp
+*.t[1-9]
+*.t[1-9][0-9]
+*.tfm
+
+#(r)(e)ledmac/(r)(e)ledpar
+*.end
+*.?end
+*.[1-9]
+*.[1-9][0-9]
+*.[1-9][0-9][0-9]
+*.[1-9]R
+*.[1-9][0-9]R
+*.[1-9][0-9][0-9]R
+*.eledsec[1-9]
+*.eledsec[1-9]R
+*.eledsec[1-9][0-9]
+*.eledsec[1-9][0-9]R
+*.eledsec[1-9][0-9][0-9]
+*.eledsec[1-9][0-9][0-9]R
+
+# glossaries
+*.acn
+*.acr
+*.glg
+*.glo
+*.gls
+*.glsdefs
+*.lzo
+*.lzs
+*.slg
+*.slo
+*.sls
+
+# uncomment this for glossaries-extra (will ignore makeindex's style files!)
+# *.ist
+
+# gnuplot
+*.gnuplot
+*.table
+
+# gnuplottex
+*-gnuplottex-*
+
+# gregoriotex
+*.gaux
+*.glog
+*.gtex
+
+# htlatex
+*.4ct
+*.4tc
+*.idv
+*.lg
+*.trc
+*.xref
+
+# hyperref
+*.brf
+
+# knitr
+*-concordance.tex
+# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
+# *.tikz
+*-tikzDictionary
+
+# listings
+*.lol
+
+# luatexja-ruby
+*.ltjruby
+
+# makeidx
+*.idx
+*.ilg
+*.ind
+
+# minitoc
+*.maf
+*.mlf
+*.mlt
+*.mtc[0-9]*
+*.slf[0-9]*
+*.slt[0-9]*
+*.stc[0-9]*
+
+# minted
+_minted*
+*.pyg
+
+# morewrites
+*.mw
+
+# newpax
+*.newpax
+
+# nomencl
+*.nlg
+*.nlo
+*.nls
+
+# pax
+*.pax
+
+# pdfpcnotes
+*.pdfpc
+
+# sagetex
+*.sagetex.sage
+*.sagetex.py
+*.sagetex.scmd
+
+# scrwfile
+*.wrt
+
+# svg
+svg-inkscape/
+
+# sympy
+*.sout
+*.sympy
+sympy-plots-for-*.tex/
+
+# pdfcomment
+*.upa
+*.upb
+
+# pythontex
+*.pytxcode
+pythontex-files-*/
+
+# tcolorbox
+*.listing
+
+# thmtools
+*.loe
+
+# TikZ & PGF
+*.dpth
+*.md5
+*.auxlock
+
+# titletoc
+*.ptc
+
+# todonotes
+*.tdo
+
+# vhistory
+*.hst
+*.ver
+
+# easy-todo
+*.lod
+
+# xcolor
+*.xcp
+
+# xmpincl
+*.xmpi
+
+# xindy
+*.xdy
+
+# xypic precompiled matrices and outlines
+*.xyc
+*.xyd
+
+# endfloat
+*.ttt
+*.fff
+
+# Latexian
+TSWLatexianTemp*
+
+## Editors:
+# WinEdt
+*.bak
+*.sav
+
+# Texpad
+.texpadtmp
+
+# LyX
+*.lyx~
+
+# Kile
+*.backup
+
+# gummi
+.*.swp
+
+# KBibTeX
+*~[0-9]*
+
+# TeXnicCenter
+*.tps
+
+# auto folder when using emacs and auctex
+./auto/*
+*.el
+
+# expex forward references with \gathertags
+*-tags.tex
+
+# standalone packages
+*.sta
+
+# Makeindex log files
+*.lpz
+
+# xwatermark package
+*.xwm
+
+# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
+# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
+# Uncomment the next line to have this generated file ignored.
+#*Notes.bib
+
+### LaTeX Patch ###
+# LIPIcs / OASIcs
+*.vtc
+
+# glossaries
+*.glstex
+
+# End of https://www.toptal.com/developers/gitignore/api/latex
diff --git a/src/test/resources/pdfs/IEEE/extract-references-test.bib b/src/test/resources/pdfs/IEEE/extract-references-test.bib
new file mode 100644
index 00000000000..eaddd5e3f66
--- /dev/null
+++ b/src/test/resources/pdfs/IEEE/extract-references-test.bib
@@ -0,0 +1,8 @@
+@Article{,
+ title = {JabRef Example for References Parsing},
+ file = {:ieee-paper.pdf:PDF},
+}
+
+@Comment{jabref-meta: databaseType:bibtex;}
+
+@Comment{jabref-meta: fileDirectory:.;}
diff --git a/src/test/resources/pdfs/IEEE/ieee-paper.bib b/src/test/resources/pdfs/IEEE/ieee-paper.bib
new file mode 100644
index 00000000000..38533080cc0
--- /dev/null
+++ b/src/test/resources/pdfs/IEEE/ieee-paper.bib
@@ -0,0 +1,51 @@
+@Article{Alver2007,
+ author = {Alver, Morten Omholt and Tenn{\o}y, Torodd and Alfredsen, Jo Arve and {\O}ie, Gunvor},
+ journal = {Aquacultural engineering},
+ title = {Automatic measurement of rotifer Brachionus plicatilis densities in first feeding tanks},
+ year = {2007},
+ number = {2},
+ pages = {115--121},
+ volume = {36},
+ publisher = {Elsevier},
+}
+
+@Article{Alver2007a,
+ author = {Alver, Morten Omholt and others},
+ journal = {Aquaculture},
+ title = {Estimating larval density in cod (Gadus morhua) first feeding tanks using measurements of feed density and larval growth rates},
+ year = {2007},
+ number = {1},
+ pages = {216--226},
+ volume = {268},
+ publisher = {Elsevier},
+}
+
+@InProceedings{Kopp2018,
+ author = {Kopp, Oliver and Armbruster, Anita and Zimmermann, Olaf},
+ booktitle = {ZEUS},
+ title = {Markdown Architectural Decision Records: Format and Tool Support},
+ year = {2018},
+ publisher = {CEUR-WS.org},
+}
+
+@InProceedings{Kopp2012,
+ author = {Oliver Kopp and others},
+ booktitle = {Business Process Model and Notation},
+ title = {{BPMN4TOSCA:} {A} Domain-Specific Language to Model Management Plans for Composite Applications},
+ year = {2012},
+ publisher = {Springer},
+ series = {LNCS},
+ volume = {125},
+ doi = {10.1007/978-3-642-33155-8_4},
+}
+
+@InProceedings{Koenig2023,
+ author = {Simone König and others},
+ booktitle = {INDIN},
+ title = {{BPMN4Cars}: A Car-Tailored Workflow Engine},
+ year = {2023},
+ publisher = {IEEE},
+ doi = {10.1109/indin51400.2023.10218082},
+}
+
+@Comment{jabref-meta: databaseType:bibtex;}
diff --git a/src/test/resources/pdfs/IEEE/ieee-paper.pdf b/src/test/resources/pdfs/IEEE/ieee-paper.pdf
new file mode 100644
index 00000000000..408d16f8254
Binary files /dev/null and b/src/test/resources/pdfs/IEEE/ieee-paper.pdf differ
diff --git a/src/test/resources/pdfs/IEEE/ieee-paper.tex b/src/test/resources/pdfs/IEEE/ieee-paper.tex
new file mode 100644
index 00000000000..62de1f28d8d
--- /dev/null
+++ b/src/test/resources/pdfs/IEEE/ieee-paper.tex
@@ -0,0 +1,39 @@
+\documentclass[conference,a4paper,english]{IEEEtran}[2015/08/26]
+
+\usepackage{lipsum}
+\usepackage{hyperref}
+\usepackage[keeplastbox]{flushend}
+
+\begin{document}
+\title{JabRef Example for Reference Parsing}
+\author{%
+ \IEEEauthorblockN{First Author}
+ \IEEEauthorblockA{University of Examples, Germany\\
+ \{lastname\}@example.org}
+}
+
+\maketitle
+
+\begin{abstract}
+\lipsum[1]
+\end{abstract}
+
+\section{Introduction}
+\lipsum[2]
+
+\section{Related Work}
+\lipsum[3]
+\cite{Alver2007,Alver2007a,Kopp2012,Kopp2018,Koenig2023}
+
+\section{Contribution}
+\lipsum[4-7]
+
+\section{Conclusion and Outlook}
+\lipsum[4]
+
+\atColsEnd{\vfil}
+
+\bibliographystyle{IEEEtran}
+\bibliography{IEEEabrv,ieee-paper}
+
+\end{document}