Skip to content

Commit

Permalink
First draft. Currently fails, because the CSV files have issues.
Browse files Browse the repository at this point in the history
  • Loading branch information
koppor committed Sep 2, 2023
1 parent 46a1c74 commit 091844b
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/refresh-journal-lists.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
java-version: 20
distribution: 'temurin'
cache: 'gradle'
- name: Update journal lists
- name: Check if journal-list.mv can be generated
run: |
./gradlew generateJournalAbbreviationList
- uses: peter-evans/create-pull-request@v5
Expand Down
16 changes: 9 additions & 7 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -291,16 +291,23 @@ tasks.register("generateBstGrammarSource", JavaExec) {
}

tasks.register("generateSearchGrammarSource", JavaExec) {
main = "org.antlr.v4.Tool"
classpath = configurations.antlr4
group = 'JabRef'
description = "Generates java files for Search.g antlr4."
classpath = configurations.antlr4
main = "org.antlr.v4.Tool"

inputs.dir("src/main/antlr4/org/jabref/search/")
outputs.dir("src-gen/main/java/org/jabref/search/")
args = ["-o","src-gen/main/java/org/jabref/search" , "-visitor", "-no-listener", "-package", "org.jabref.search", "$projectDir/src/main/antlr4/org/jabref/search/Search.g4"]
}

tasks.register("generateJournalAbbreviationList", JavaExec) {
group = "JabRef"
description = "Converts the comma-separated journal abbreviation file to a H2 MVStore"
classpath = sourceSets.main.runtimeClasspath
main = "org.jabref.cli.JournalListMvGenerator"
}
build.dependsOn "generateJournalAbbreviationList"

tasks.register('generateCitaviSource', XjcTask) {
group = 'JabRef'
Expand All @@ -311,11 +318,6 @@ tasks.register('generateCitaviSource', XjcTask) {
javaPackage = "org.jabref.logic.importer.fileformat.citavi"
}

tasks.register('generateJournalAbbreviationList') {
group = 'JabRef'
description = "Converts the comma-separated journal abbreviation file to a H2 MVStore."
}

tasks.withType(JavaCompile).configureEach {
options.encoding = 'UTF-8'
}
Expand Down
59 changes: 58 additions & 1 deletion src/main/java/org/jabref/cli/JournalListMvGenerator.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,67 @@
package org.jabref.cli;

import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import org.jabref.logic.journals.Abbreviation;
import org.jabref.logic.journals.JournalAbbreviationLoader;

import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.jooq.lambda.Unchecked;

public class JournalListMvGenerator {

public static void main(String[] args) {
public static void main(String[] args) throws IOException {
Path abbreviationsDirectory = Path.of("buildres", "abbrv.jabref.org", "journals");
if (!Files.exists(abbreviationsDirectory)) {
System.out.println("Path " + abbreviationsDirectory.toAbsolutePath() + " does not exist");
System.exit(1);
}
Set<String> ignoredNames = Set.of(
// remove all lists without dot in them:
// we use abbreviation lists containing dots in them only (to be consistent)
"journal_abbreviations_entrez.csv",
"journal_abbreviations_medicus.csv",
"journal_abbreviations_webofscience-dotless.csv",

// we currently do not have good support for BibTeX strings
"journal_abbreviations_ieee_strings.csv"
);

Path journalListMvFile = Path.of("build", "resources", "main", "journals");
Files.deleteIfExists(journalListMvFile);

try (DirectoryStream<Path> stream = Files.newDirectoryStream(abbreviationsDirectory, "*.csv");
MVStore store = MVStore.open(journalListMvFile.toString())) {
MVMap<String, Abbreviation> fullToAbbreviation = store.openMap("FullToAbbreviation");
stream.forEach(Unchecked.consumer(path -> {
String fileName = path.getFileName().toString();
System.out.print("Checking ");
System.out.print(fileName);
if (ignoredNames.contains(fileName)) {
System.out.println(" ignored");
} else {
System.out.println("...");
Collection<Abbreviation> abbreviations = JournalAbbreviationLoader.readAbbreviationsFromCsvFile(path);
Map<String, Abbreviation> abbreviationMap = abbreviations
.stream()
.collect(Collectors.toMap(
Abbreviation::getName,
abbreviation -> abbreviation,
(abbreviation1, abbreviation2) -> {
System.out.println("Double entry " + abbreviation1.getName());
return abbreviation2;
}));
fullToAbbreviation.putAll(abbreviationMap);
}
}));
}
}
}
Binary file removed src/main/resources/journals/journal-list.mv
Binary file not shown.

0 comments on commit 091844b

Please sign in to comment.