Skip to content

Commit

Permalink
CSL4LibreOffice - D [GSoC '24] (JabRef#11636)
Browse files Browse the repository at this point in the history
* Refactor CitationStyleGeneratorTest

* Add test: [StAX] Parse title, isNumericStyle

* Add CSLFormatUtils

* Refactor CSLCitationOOAdapter and add JavaDoc

* Add test for citeproc DIN 1505-2

* Better method names, more javadoc

* Add tests for CSLFormatUtils

* Add javadoc for adapter

* Fix locales

* Fix submodules for styles

* OpenRewrite

* Fix submodules for styles

* Fix locales

* Rename test method

* Change order of methods in mark manager

* Fix submodules for styles

* Fix locales

* Disable test

* Review actions - I

* Better javadoc for disabled test

* Remove "public" as per best practices

* Review changes [3]

* Swap arguments for CitationStyleTest

* Add comment to @disabled
  • Loading branch information
subhramit authored Aug 24, 2024
1 parent ba4bd2d commit f6ea6a9
Show file tree
Hide file tree
Showing 7 changed files with 1,068 additions and 200 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.jabref.logic.openoffice.style.OOStyle;
import org.jabref.logic.util.StandardFileType;

import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -76,7 +77,8 @@ private static Optional<CitationStyle> createCitationStyleFromSource(final Input
public record StyleInfo(String title, boolean isNumericStyle) {
}

private static Optional<StyleInfo> parseStyleInfo(String filename, String content) {
@VisibleForTesting
static Optional<StyleInfo> parseStyleInfo(String filename, String content) {
FACTORY.setProperty(XMLInputFactory.IS_COALESCING, true);

try {
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package org.jabref.logic.openoffice.oocsltext;

import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.logic.citationkeypattern.BracketedPattern;
import org.jabref.logic.citationstyle.CitationStyleOutputFormat;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibEntryTypesManager;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.openoffice.ootext.OOText;
import org.jabref.model.openoffice.ootext.OOTextIntoOO;

import com.sun.star.text.XTextCursor;
import com.sun.star.text.XTextDocument;
import org.apache.commons.text.StringEscapeUtils;

/**
* Contains utility constants and methods for processing of CSL citations as generated by methods of <a href="https://github.com/michel-kraemer/citeproc-java">citeproc-java</a> ({@link org.jabref.logic.citationstyle.CitationStyleGenerator}).
* <p>These methods are used in {@link CSLCitationOOAdapter} which inserts CSL citation text into an OO document.</p>
*/
public class CSLFormatUtils {

// TODO: These are static final fields right now, should add the functionality to let user select these and store them in preferences.
public static final String DEFAULT_BIBLIOGRAPHY_TITLE = "References";
public static final String DEFAULT_BIBLIOGRAPHY_HEADER_PARAGRAPH_FORMAT = "Heading 2";

public static final CitationStyleOutputFormat OUTPUT_FORMAT = CitationStyleOutputFormat.HTML;
private static final Pattern YEAR_IN_CITATION_PATTERN = Pattern.compile("(.)(.*), (\\d{4}.*)");

/**
* Transforms provided HTML into a format that can be fully parsed and inserted into an OO document.
* Context: The HTML produced by {@link org.jabref.logic.citationstyle.CitationStyleGenerator#generateCitation(List, String, CitationStyleOutputFormat, BibDatabaseContext, BibEntryTypesManager) generateCitation} or {@link org.jabref.logic.citationstyle.CitationStyleGenerator#generateInText(List, String, CitationStyleOutputFormat, BibDatabaseContext, BibEntryTypesManager) generateInText} is not directly (completely) parsable by by {@link OOTextIntoOO#write(XTextDocument, XTextCursor, OOText) write}.
* For more details, read the documentation for the {@link OOTextIntoOO} class.
* <a href="https://devdocs.jabref.org/code-howtos/openoffice/code-reorganization.html">Additional Information</a>.
*
* @param html The HTML string to be transformed into OO-write ready HTML.
* @return The formatted html string.
*/
public static String transformHTML(String html) {
// Initial clean up of escaped characters
html = StringEscapeUtils.unescapeHtml4(html);

// Handle margins (spaces between citation number and text)
html = html.replaceAll("<div class=\"csl-left-margin\">(.*?)</div><div class=\"csl-right-inline\">(.*?)</div>", "$1 $2");

// Remove unsupported tags
html = html.replaceAll("<div[^>]*>", "");
html = html.replace("</div>", "");

// Remove unsupported links
html = html.replaceAll("<a[^>]*>", "");
html = html.replace("</a>", "");

// Replace span tags with inline styles for bold
html = html.replaceAll("<span style=\"font-weight: ?bold;?\">(.*?)</span>", "<b>$1</b>");

// Replace span tags with inline styles for italic
html = html.replaceAll("<span style=\"font-style: ?italic;?\">(.*?)</span>", "<i>$1</i>");

// Replace span tags with inline styles for underline
html = html.replaceAll("<span style=\"text-decoration: ?underline;?\">(.*?)</span>", "<u>$1</u>");

html = html.replaceAll("<span style=\"font-variant: ?small-caps;?\">(.*?)</span>", "<smallcaps>$1</smallcaps>");

// Clean up any remaining span tags
html = html.replaceAll("</?span[^>]*>", "");

return html;
}

/**
* Alphanumeric citations are not natively supported by citeproc-java (see {@link org.jabref.logic.citationstyle.CitationStyleGenerator#generateInText(List, String, CitationStyleOutputFormat, BibDatabaseContext, BibEntryTypesManager) generateInText}).
* Thus, we manually format a citation to produce its alphanumeric form.
*
* @param entries the list of entries for which the alphanumeric citation is to be generated.
* @return the alphanumeric citation (for a single entry or a group of entries).
*/
public static String generateAlphanumericCitation(List<BibEntry> entries, BibDatabaseContext bibDatabaseContext) {
StringBuilder citation = new StringBuilder("[");
for (int i = 0; i < entries.size(); i++) {
BibEntry entry = entries.get(i);
Optional<String> author = entry.getResolvedFieldOrAlias(StandardField.AUTHOR, bibDatabaseContext.getDatabase());
Optional<String> year = entry.getResolvedFieldOrAlias(StandardField.YEAR, bibDatabaseContext.getDatabase());

if (author.isPresent() && year.isPresent()) {
AuthorList authorList = AuthorList.parse(author.get());
String alphaKey = BracketedPattern.authorsAlpha(authorList);

// Extract last two digits of the year
String shortYear = year.get().length() >= 2 ?
year.get().substring(year.get().length() - 2) :
year.get();

citation.append(alphaKey).append(shortYear);
} else {
citation.append(entry.getCitationKey().orElse(""));
}

if (i < entries.size() - 1) {
citation.append("; ");
}
}
citation.append("]");
return citation.toString();
}

/**
* Method to update citation number of a bibliographic entry (to be inserted in the list of references).
* By default, citeproc-java ({@link org.jabref.logic.citationstyle.CitationStyleGenerator#generateCitation(List, String, CitationStyleOutputFormat, BibDatabaseContext, BibEntryTypesManager) generateCitation} always start the numbering of a list of citations with "1".
* If a citation doesn't correspond to the first cited entry, the number should be changed to the relevant current citation number.
* If an entries has been cited before, the colder number should be reused.
* The number can be enclosed in different formats, such as "1", "1.", "1)", "(1)" or "[1]".
* <p>
* <b>Precondition:</b> Use ONLY with numeric citation styles.</p>
*
* @param citation the numeric citation with an unresolved number.
* @param currentNumber the correct number to update the citation with.
* @return the bibliographic citation with resolved number.
*/
public static String updateSingleBibliographyNumber(String citation, int currentNumber) {
Pattern pattern = Pattern.compile("(\\[|\\()?(\\d+)(\\]|\\))?(\\.)?\\s*");
Matcher matcher = pattern.matcher(citation);
StringBuilder sb = new StringBuilder();
boolean numberReplaced = false;

while (matcher.find()) {
if (!numberReplaced) {
String prefix = matcher.group(1) != null ? matcher.group(1) : "";
String suffix = matcher.group(3) != null ? matcher.group(3) : "";
String dot = matcher.group(4) != null ? "." : "";
String space = matcher.group().endsWith(" ") ? " " : "";

String replacement = prefix + currentNumber + suffix + dot + space;

matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
numberReplaced = true;
} else {
matcher.appendReplacement(sb, matcher.group());
}
}
matcher.appendTail(sb);
return sb.toString();
}

/**
* Extracts year from a citation having single or multiple entries, for the purpose of using in in-text citations.
*
* @param formattedCitation the citation cleaned up and formatted using {@link CSLFormatUtils#transformHTML transformHTML}.
*/
public static String changeToInText(String formattedCitation) {
Matcher matcher = YEAR_IN_CITATION_PATTERN.matcher(formattedCitation);
if (matcher.find()) {
return matcher.group(2) + " " + matcher.group(1) + matcher.group(3);
}
return formattedCitation;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,21 @@ public CSLReferenceMarkManager(XTextDocument document) {
this.citationKeyToNumber = new HashMap<>();
}

public CSLReferenceMark createReferenceMark(BibEntry entry) throws Exception {
String citationKey = entry.getCitationKey().orElse(CUID.randomCUID2(8).toString());
int citationNumber = getCitationNumber(citationKey);
CSLReferenceMark referenceMark = CSLReferenceMark.of(citationKey, citationNumber, factory);
addMark(referenceMark);
return referenceMark;
}

public void addMark(CSLReferenceMark mark) {
marksByName.put(mark.getName(), mark);
idsByMark.put(mark, marksByID.size());
marksByID.add(mark);
updateCitationInfo(mark.getName());
}

public void readExistingMarks() throws WrappedTargetException, NoSuchElementException {
XReferenceMarksSupplier supplier = UnoRuntime.queryInterface(XReferenceMarksSupplier.class, document);
XNameAccess marks = supplier.getReferenceMarks();
Expand Down Expand Up @@ -72,26 +87,11 @@ private void updateCitationInfo(String name) {
}
}

public void addMark(CSLReferenceMark mark) {
marksByName.put(mark.getName(), mark);
idsByMark.put(mark, marksByID.size());
marksByID.add(mark);
updateCitationInfo(mark.getName());
public boolean hasCitationForKey(String citationKey) {
return citationKeyToNumber.containsKey(citationKey);
}

public int getCitationNumber(String citationKey) {
return citationKeyToNumber.computeIfAbsent(citationKey, k -> ++highestCitationNumber);
}

public CSLReferenceMark createReferenceMark(BibEntry entry) throws Exception {
String citationKey = entry.getCitationKey().orElse(CUID.randomCUID2(8).toString());
int citationNumber = getCitationNumber(citationKey);
CSLReferenceMark referenceMark = CSLReferenceMark.of(citationKey, citationNumber, factory);
addMark(referenceMark);
return referenceMark;
}

public boolean hasCitationForKey(String citationKey) {
return citationKeyToNumber.containsKey(citationKey);
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jabref.logic.citationstyle;

import java.io.IOException;
import java.util.List;
import java.util.stream.Stream;

Expand All @@ -13,6 +14,8 @@
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;

import de.undercouch.citeproc.output.Citation;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
Expand All @@ -22,15 +25,16 @@

class CitationStyleGeneratorTest {

private final BibEntry testEntry = TestEntry.getTestEntry();
private final BibDatabaseContext context = new BibDatabaseContext(new BibDatabase(List.of(testEntry)));
private final BibEntryTypesManager bibEntryTypesManager = new BibEntryTypesManager();
private final List<CitationStyle> styleList = CitationStyle.discoverCitationStyles();

@Test
void aCMCitation() {
BibDatabaseContext context = new BibDatabaseContext(new BibDatabase(List.of(TestEntry.getTestEntry())));
context.setMode(BibDatabaseMode.BIBLATEX);
List<CitationStyle> styleList = CitationStyle.discoverCitationStyles();
CitationStyle style = styleList.stream().filter(e -> "ACM SIGGRAPH".equals(e.getTitle())).findAny().orElse(null);
String citation = CitationStyleGenerator.generateCitation(List.of(TestEntry.getTestEntry()), style.getSource(), CitationStyleOutputFormat.HTML, context, new BibEntryTypesManager()).getFirst();
CitationStyle style = styleList.stream().filter(e -> "ACM SIGGRAPH".equals(e.getTitle())).findAny().get();
String citation = CitationStyleGenerator.generateCitation(List.of(testEntry), style.getSource(), CitationStyleOutputFormat.HTML, context, bibEntryTypesManager).getFirst();

// if the acm-siggraph.csl citation style changes this has to be modified
String expected = " <div class=\"csl-entry\">"
Expand All @@ -43,11 +47,9 @@ void aCMCitation() {

@Test
void aPACitation() {
BibDatabaseContext context = new BibDatabaseContext(new BibDatabase(List.of(TestEntry.getTestEntry())));
context.setMode(BibDatabaseMode.BIBLATEX);
List<CitationStyle> styleList = CitationStyle.discoverCitationStyles();
CitationStyle style = styleList.stream().filter(e -> "American Psychological Association 7th edition".equals(e.getTitle())).findAny().orElse(null);
String citation = CitationStyleGenerator.generateCitation(List.of(TestEntry.getTestEntry()), style.getSource(), CitationStyleOutputFormat.HTML, context, new BibEntryTypesManager()).getFirst();
CitationStyle style = styleList.stream().filter(e -> "American Psychological Association 7th edition".equals(e.getTitle())).findAny().get();
String citation = CitationStyleGenerator.generateCitation(List.of(testEntry), style.getSource(), CitationStyleOutputFormat.HTML, context, bibEntryTypesManager).getFirst();

// if the apa-7th-citation.csl citation style changes this has to be modified
String expected = " <div class=\"csl-entry\">"
Expand All @@ -58,6 +60,21 @@ void aPACitation() {
assertEquals(expected, citation);
}

/**
* Fails due to citeproc-java ({@link CitationStyleGenerator#generateInText(List, String, CitationStyleOutputFormat, BibDatabaseContext, BibEntryTypesManager) generateInText}) returning an empty citation.
* Alphanumeric citations are thus, currently manually generated by formatting (see {@link org.jabref.logic.openoffice.oocsltext.CSLFormatUtils#generateAlphanumericCitation(List, BibDatabaseContext) generateAlphaNumericCitation}).
*/
@Test
@Disabled("Till alphanumeric citations are supported by citeproc-java")
void din1502AlphanumericInTextCitation() throws IOException {
context.setMode(BibDatabaseMode.BIBLATEX);
CitationStyle style = styleList.stream().filter(e -> "DIN 1505-2 (alphanumeric, Deutsch) - standard superseded by ISO-690".equals(e.getTitle())).findAny().get();
Citation citation = CitationStyleGenerator.generateInText(List.of(testEntry), style.getSource(), CitationStyleOutputFormat.HTML, context, bibEntryTypesManager);
String inTextCitationText = citation.getText();

assertEquals("[Smit2016]", inTextCitationText);
}

@Test
void ignoreNewLine() {
BibEntry entry = new BibEntry();
Expand Down Expand Up @@ -97,23 +114,21 @@ void htmlFormat() {
" <div class=\"csl-left-margin\">[1]</div><div class=\"csl-right-inline\">B. Smith, B. Jones, and J. Williams, &ldquo;Title of the test entry,&rdquo; <span style=\"font-style: italic\">BibTeX Journal</span>, vol. 34, no. 3, pp. 45&ndash;67, Jul. 2016, doi: 10.1001/bla.blubb.</div>\n" +
" </div>\n";

BibEntry entry = TestEntry.getTestEntry();
String style = CitationStyle.getDefault().getSource();
CitationStyleOutputFormat format = CitationStyleOutputFormat.HTML;

String actualCitation = CitationStyleGenerator.generateCitation(List.of(entry), style, format, new BibDatabaseContext(), bibEntryTypesManager).getFirst();
String actualCitation = CitationStyleGenerator.generateCitation(List.of(testEntry), style, format, context, bibEntryTypesManager).getFirst();
assertEquals(expectedCitation, actualCitation);
}

@Test
void textFormat() {
String expectedCitation = "[1]B. Smith, B. Jones, and J. Williams, “Title of the test entry,” BibTeX Journal, vol. 34, no. 3, pp. 45–67, Jul. 2016, doi: 10.1001/bla.blubb.\n";

BibEntry entry = TestEntry.getTestEntry();
String style = CitationStyle.getDefault().getSource();
CitationStyleOutputFormat format = CitationStyleOutputFormat.TEXT;

String actualCitation = CitationStyleGenerator.generateCitation(List.of(entry), style, format, new BibDatabaseContext(new BibDatabase(List.of(entry))), bibEntryTypesManager).getFirst();
String actualCitation = CitationStyleGenerator.generateCitation(List.of(testEntry), style, format, context, bibEntryTypesManager).getFirst();
assertEquals(expectedCitation, actualCitation);
}

Expand All @@ -134,12 +149,11 @@ void handleDiacritics() {
@Test
void handleAmpersand() {
String expectedCitation = "[1]B. Smith, B. Jones, and J. Williams, “Famous quote: “&TitleTest&” - that is it,” BibTeX Journal, vol. 34, no. 3, pp. 45–67, Jul. 2016, doi: 10.1001/bla.blubb.\n";
BibEntry entry = TestEntry.getTestEntry();
entry.setField(StandardField.TITLE, "Famous quote: “&TitleTest&” - that is it");
testEntry.setField(StandardField.TITLE, "Famous quote: “&TitleTest&” - that is it");
String style = CitationStyle.getDefault().getSource();
CitationStyleOutputFormat format = CitationStyleOutputFormat.TEXT;

String actualCitation = CitationStyleGenerator.generateCitation(List.of(entry), style, format, new BibDatabaseContext(), bibEntryTypesManager).getFirst();
String actualCitation = CitationStyleGenerator.generateCitation(List.of(testEntry), style, format, context, bibEntryTypesManager).getFirst();
assertEquals(expectedCitation, actualCitation);
}

Expand Down Expand Up @@ -208,7 +222,7 @@ static Stream<Arguments> cslMapping() {
.withField(StandardField.ISSUE, "7")
.withField(StandardField.EID, "e0270533"),
"ieee.csl"),
Arguments.of(
Arguments.of(
"[1]F. Last and J. Doe, no. 33, pp. 7–8.\n",
BibDatabaseMode.BIBLATEX,
new BibEntry(StandardEntryType.Article)
Expand Down Expand Up @@ -574,15 +588,14 @@ static Stream<Arguments> cslMapping() {

@ParameterizedTest
@MethodSource
void cslMapping(String expected, BibDatabaseMode mode, BibEntry entry, String cslFileName) throws Exception {
BibDatabaseContext bibDatabaseContext = new BibDatabaseContext(new BibDatabase(List.of(entry)));
bibDatabaseContext.setMode(mode);
void cslMapping(String expected, BibDatabaseMode mode, BibEntry entry, String cslFileName) {
context.setMode(mode);

String citation = CitationStyleGenerator.generateCitation(
List.of(entry),
CitationStyle.createCitationStyleFromFile(cslFileName).orElseThrow().getSource(),
CitationStyleOutputFormat.TEXT,
bibDatabaseContext,
context,
bibEntryTypesManager).getFirst();
assertEquals(expected, citation);
}
Expand Down
Loading

0 comments on commit f6ea6a9

Please sign in to comment.