diff --git a/CHANGELOG.md b/CHANGELOG.md index 55d8ccfcdb5..98c21e0915c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - We added a dropdown menu to let users change the reference library during AUX file import. [#10472](https://github.com/JabRef/jabref/issues/10472) - We added a button to let users reset the cite command to the default value. [#10569](https://github.com/JabRef/jabref/issues/10569) +- We added [scholar.archive.org](https://scholar.archive.org/) as a new fetcher. [#10498](https://github.com/JabRef/jabref/issues/10498) ### Changed diff --git a/src/main/java/org/jabref/logic/importer/WebFetchers.java b/src/main/java/org/jabref/logic/importer/WebFetchers.java index 344fa6399b3..332811085fa 100644 --- a/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -37,6 +37,7 @@ import org.jabref.logic.importer.fetcher.OpenAccessDoi; import org.jabref.logic.importer.fetcher.ResearchGate; import org.jabref.logic.importer.fetcher.RfcFetcher; +import org.jabref.logic.importer.fetcher.ScholarArchiveFetcher; import org.jabref.logic.importer.fetcher.ScienceDirect; import org.jabref.logic.importer.fetcher.SemanticScholar; import org.jabref.logic.importer.fetcher.SpringerFetcher; @@ -126,6 +127,7 @@ public static SortedSet getSearchBasedFetchers(ImportFormatP set.add(new ResearchGate(importFormatPreferences)); set.add(new BiodiversityLibrary(importerPreferences)); set.add(new LOBIDFetcher(importerPreferences)); + set.add(new ScholarArchiveFetcher()); return set; } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcher.java new file mode 100644 index 00000000000..0f6b810c6ac --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcher.java @@ -0,0 +1,158 @@ +package org.jabref.logic.importer.fetcher; + +import java.net.MalformedURLException; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.stream.IntStream; + +import org.jabref.logic.importer.FetcherException; +import org.jabref.logic.importer.PagedSearchBasedParserFetcher; +import org.jabref.logic.importer.ParseException; +import org.jabref.logic.importer.Parser; +import org.jabref.logic.importer.fetcher.transformers.ScholarArchiveQueryTransformer; +import org.jabref.logic.importer.util.JsonReader; +import org.jabref.logic.net.URLDownload; +import org.jabref.model.entry.AuthorList; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.EntryType; +import org.jabref.model.entry.types.StandardEntryType; + +import jakarta.ws.rs.core.MediaType; +import kong.unirest.json.JSONArray; +import kong.unirest.json.JSONException; +import kong.unirest.json.JSONObject; +import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ScholarArchiveFetcher implements PagedSearchBasedParserFetcher { + + public static final String FETCHER_NAME = "ScholarArchive"; + + private static final Logger LOGGER = LoggerFactory.getLogger(ScholarArchiveFetcher.class); + + private static final String API_URL = "https://scholar.archive.org/search"; + + /** + * Gets the query URL by luceneQuery and pageNumber. + * + * @param luceneQuery the search query + * @param pageNumber the number of the page indexed from 0 + * @return URL + */ + @Override + public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException { + URIBuilder uriBuilder = new URIBuilder(API_URL); + uriBuilder.addParameter("q", new ScholarArchiveQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); + uriBuilder.addParameter("from", String.valueOf(getPageSize() * pageNumber)); + uriBuilder.addParameter("size", String.valueOf(getPageSize())); + uriBuilder.addParameter("format", "json"); + + LOGGER.debug("using URL for search {}", uriBuilder.build()); + return uriBuilder.build().toURL(); + } + + @Override + public URLDownload getUrlDownload(URL url) { + URLDownload download = new URLDownload(url); + download.addHeader("Accept", MediaType.APPLICATION_JSON); + return download; + } + + /** + * Gets the list of BibEntry by given Json response from scholar archive fetcher API + * + * @return Parser, list of BibEntry + */ + @Override + public Parser getParser() { + return inputStream -> { + JSONObject response = JsonReader.toJsonObject(inputStream); + List entries = new ArrayList<>(); + if (response.has("results")) { + JSONArray results = response.getJSONArray("results"); + for (int i = 0; i < results.length(); i++) { + JSONObject jsonEntry = results.getJSONObject(i); + BibEntry entry = parseJSONtoBibtex(jsonEntry); + entries.add(entry); + } + } + + return entries; + }; + } + + @Override + public String getName() { + return FETCHER_NAME; + } + + private BibEntry parseJSONtoBibtex(JSONObject jsonEntry) throws ParseException { + try { + BibEntry entry = new BibEntry(); + EntryType entryType = StandardEntryType.InCollection; + JSONObject biblio = jsonEntry.optJSONObject("biblio"); + + JSONArray abstracts = jsonEntry.getJSONArray("abstracts"); + String foundAbstract = IntStream.range(0, abstracts.length()) + .mapToObj(abstracts::getJSONObject) + .map(object -> object.optString("body")) + .findFirst().orElse(""); + + String url = Optional.ofNullable(jsonEntry.optJSONObject("fulltext")).map(fullText -> fullText.optString("access_url")).orElse(""); + + // publication type + String type = biblio.optString("release_type"); + entry.setField(StandardField.TYPE, type); + if (type.toLowerCase().contains("book")) { + entryType = StandardEntryType.Book; + } else if (type.toLowerCase().contains("article")) { + entryType = StandardEntryType.Article; + } + entry.setType(entryType); + + entry.setField(StandardField.TITLE, biblio.optString("title")); + entry.setField(StandardField.JOURNAL, biblio.optString("container_name")); + entry.setField(StandardField.DOI, biblio.optString("doi")); + entry.setField(StandardField.ISSUE, biblio.optString("issue")); + entry.setField(StandardField.LANGUAGE, biblio.optString("lang_code")); + entry.setField(StandardField.PUBLISHER, biblio.optString("publisher")); + + entry.setField(StandardField.YEAR, String.valueOf(biblio.optInt("release_year"))); + entry.setField(StandardField.VOLUME, String.valueOf(biblio.optInt("volume_int"))); + entry.setField(StandardField.ABSTRACT, foundAbstract); + entry.setField(StandardField.URL, url); + + String dateString = biblio.optString("date"); + entry.setField(StandardField.DATE, dateString); + + // Authors are in contrib_names + if (biblio.has("contrib_names")) { + JSONArray authors = biblio.getJSONArray("contrib_names"); + List authorList = new ArrayList<>(); + for (int i = 0; i < authors.length(); i++) { + authorList.add(authors.getString(i)); + } + AuthorList parsedAuthors = AuthorList.parse(String.join(" and ", authorList)); + entry.setField(StandardField.AUTHOR, parsedAuthors.getAsLastFirstNamesWithAnd(false)); + } + + if (biblio.has("issns")) { + JSONArray issn = biblio.getJSONArray("issns"); + List issnList = new ArrayList<>(); + for (int i = 0; i < issn.length(); i++) { + issnList.add(issn.getString(i)); + } + entry.setField(StandardField.ISSN, String.join(" ", issnList)); + } + return entry; + } catch (JSONException exception) { + throw new ParseException("ScholarArchive API JSON format has changed", exception); + } + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformers/ScholarArchiveQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformers/ScholarArchiveQueryTransformer.java new file mode 100644 index 00000000000..18f84521901 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformers/ScholarArchiveQueryTransformer.java @@ -0,0 +1,71 @@ +package org.jabref.logic.importer.fetcher.transformers; + +/** + * This class extends the AbstractQueryTransformer to provide specific implementations + * for transforming standard queries into ones suitable for the Scholar Archive's unique format. + */ +public class ScholarArchiveQueryTransformer extends AbstractQueryTransformer { + + @Override + protected String getLogicalAndOperator() { + return " AND "; + } + + @Override + protected String getLogicalOrOperator() { + return " OR "; + } + + @Override + protected String getLogicalNotOperator() { + return "NOT "; + } + + @Override + protected String handleAuthor(String author) { + return createKeyValuePair("contrib_names", author); + } + + @Override + protected String handleTitle(String title) { + return createKeyValuePair("title", title); + } + + @Override + protected String handleJournal(String journalTitle) { + return createKeyValuePair("container_name", journalTitle); + } + + /** + * Handles the year query by formatting it specifically for a range search in the Scholar Archive. + * This method is for an exact year match. + * + * @param year the publication year to be searched in the Scholar Archive. + * @return A string query segment formatted for the year search. + */ + @Override + protected String handleYear(String year) { + return "publication.startDate:[" + year + " TO " + year + "]"; + } + + /** + * Handles a year range query, transforming it for the Scholar Archive's query format. + * If only a start year is provided, the range will extend to the current year. + * + * @param yearRange the range of years to be searched in the Scholar Archive, usually in the format "startYear-endYear". + * @return A string query segment formatted for the year range search. + */ + @Override + protected String handleYearRange(String yearRange) { + parseYearRange(yearRange); + if (endYear == Integer.MAX_VALUE) { + // If no specific end year is set, it assumes the range extends to the current year. + return yearRange; + } + return "publication.startDate:[" + startYear + " TO " + endYear + "]"; + } +} + + + + diff --git a/src/test/java/org/jabref/gui/slr/ManageStudyDefinitionViewModelTest.java b/src/test/java/org/jabref/gui/slr/ManageStudyDefinitionViewModelTest.java index eac16dd0c3f..272a045746c 100644 --- a/src/test/java/org/jabref/gui/slr/ManageStudyDefinitionViewModelTest.java +++ b/src/test/java/org/jabref/gui/slr/ManageStudyDefinitionViewModelTest.java @@ -51,6 +51,7 @@ public void emptyStudyConstructorFillsDatabasesCorrectly() { new StudyCatalogItem("Medline/PubMed", false), new StudyCatalogItem("ResearchGate", false), new StudyCatalogItem("SAO/NASA ADS", false), + new StudyCatalogItem("ScholarArchive", false), new StudyCatalogItem("SemanticScholar", false), new StudyCatalogItem("Springer", true), new StudyCatalogItem("zbMATH", false) @@ -59,21 +60,7 @@ public void emptyStudyConstructorFillsDatabasesCorrectly() { @Test public void studyConstructorFillsDatabasesCorrectly(@TempDir Path tempDir) { - List databases = List.of( - new StudyDatabase("ACM Portal", true)); - Study study = new Study( - List.of("Name"), - "title", - List.of("Q1"), - List.of(), - databases - ); - ManageStudyDefinitionViewModel manageStudyDefinitionViewModel = new ManageStudyDefinitionViewModel( - study, - tempDir, - importFormatPreferences, - importerPreferences, - dialogService); + ManageStudyDefinitionViewModel manageStudyDefinitionViewModel = getManageStudyDefinitionViewModel(tempDir); assertEquals(List.of( new StudyCatalogItem("ACM Portal", true), new StudyCatalogItem("ArXiv", false), @@ -92,9 +79,28 @@ public void studyConstructorFillsDatabasesCorrectly(@TempDir Path tempDir) { new StudyCatalogItem("Medline/PubMed", false), new StudyCatalogItem("ResearchGate", false), new StudyCatalogItem("SAO/NASA ADS", false), + new StudyCatalogItem("ScholarArchive", false), new StudyCatalogItem("SemanticScholar", false), new StudyCatalogItem("Springer", false), new StudyCatalogItem("zbMATH", false) ), manageStudyDefinitionViewModel.getCatalogs()); } + + private ManageStudyDefinitionViewModel getManageStudyDefinitionViewModel(Path tempDir) { + List databases = List.of( + new StudyDatabase("ACM Portal", true)); + Study study = new Study( + List.of("Name"), + "title", + List.of("Q1"), + List.of(), + databases + ); + return new ManageStudyDefinitionViewModel( + study, + tempDir, + importFormatPreferences, + importerPreferences, + dialogService); + } } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcherTest.java new file mode 100644 index 00000000000..0af2c4f0eb4 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/ScholarArchiveFetcherTest.java @@ -0,0 +1,51 @@ +package org.jabref.logic.importer.fetcher; + +import java.util.List; + +import org.jabref.logic.importer.FetcherException; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@FetcherTest +public class ScholarArchiveFetcherTest { + private ScholarArchiveFetcher fetcher; + private BibEntry bibEntry; + + @BeforeEach + public void setUp() { + fetcher = new ScholarArchiveFetcher(); + bibEntry = new BibEntry(StandardEntryType.InCollection) + .withField(StandardField.TITLE, "Query expansion using associated queries") + .withField(StandardField.AUTHOR, "Billerbeck, Bodo and Scholer, Falk and Williams, Hugh E. and Zobel, Justin") + .withField(StandardField.VOLUME, "0") + .withField(StandardField.DOI, "10.1145/956863.956866") + .withField(StandardField.JOURNAL, "Proceedings of the twelfth international conference on Information and knowledge management - CIKM '03") + .withField(StandardField.PUBLISHER, "ACM Press") + .withField(StandardField.TYPE, "paper-conference") + .withField(StandardField.YEAR, "2003") + .withField(StandardField.URL, "https://web.archive.org/web/20170810164449/http://goanna.cs.rmit.edu.au/~jz/fulltext/cikm03.pdf"); + } + + @Test + public void getNameReturnsCorrectName() { + assertEquals("ScholarArchive", fetcher.getName()); + } + + @Test + public void performSearchReturnsExpectedResults() throws FetcherException { + List fetchedEntries = fetcher.performSearch("query"); + fetchedEntries.forEach(entry -> entry.clearField(StandardField.ABSTRACT)); + assertTrue(fetchedEntries.contains(bibEntry), "Found the following entries " + fetchedEntries); + } +} + + +