From 7bbca7cc5e84eeacd94a769c9c1be132e1d7cd57 Mon Sep 17 00:00:00 2001 From: bkis Date: Mon, 13 Dec 2021 11:09:49 +0100 Subject: [PATCH 1/2] Implement CSV export for ExportMultipleFiles tool closes #370 --- .../annotator/plugin/csv/CsvExportPlugin.java | 123 +++++++++++++----- .../coref/annotator/plugin/csv/Defaults.java | 1 + .../annotator/tools/ExportMultipleFiles.java | 18 ++- 3 files changed, 106 insertions(+), 36 deletions(-) diff --git a/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/CsvExportPlugin.java b/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/CsvExportPlugin.java index 2985528c..40d2a18a 100644 --- a/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/CsvExportPlugin.java +++ b/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/CsvExportPlugin.java @@ -32,8 +32,12 @@ import de.unistuttgart.ims.coref.annotator.plugins.UimaExportPlugin; import javafx.stage.FileChooser.ExtensionFilter; -public class CsvExportPlugin extends AbstractExportPlugin implements UimaExportPlugin, ConfigurableExportPlugin { +public class CsvExportPlugin + extends AbstractExportPlugin + implements UimaExportPlugin, ConfigurableExportPlugin { + + public static enum ContextUnit { CHARACTER, TOKEN, LINE; @@ -66,13 +70,26 @@ public AnalysisEngineDescription getExporter() throws ResourceInitializationExce @Override public AnalysisEngineDescription getWriter(File f) throws ResourceInitializationException { + AggregateBuilder b = new AggregateBuilder(); - b.add(AnalysisEngineFactory.createEngineDescription(CSVWriter.class, CSVWriter.PARAM_FILE, f.getAbsolutePath(), - CSVWriter.PARAM_CONTEXTWIDTH, getOptionContextWidth(), CSVWriter.PARAM_REPLACE_NEWLINES, - isOptionReplaceNewlines(), CSVWriter.PARAM_TRIM_WHITESPACE, isOptionTrimWhitespace(), - CSVWriter.PARAM_CONTEXT_UNIT, getOptionContextUnit(), CSVWriter.PARAM_INCLUDE_LINE_NUMBERS, - Annotator.app.getPreferences().getBoolean(Constants.PLUGIN_CSV_INCLUDE_LINE_NUMBERS, - Defaults.CFG_OPTION_INCLUDE_LINE_NUMBERS))); + b.add(AnalysisEngineFactory.createEngineDescription( + CSVWriter.class, + CSVWriter.PARAM_FILE, + f.getAbsolutePath(), + CSVWriter.PARAM_CONTEXTWIDTH, + getOptionContextWidth(), + CSVWriter.PARAM_REPLACE_NEWLINES, + isOptionReplaceNewlines(), + CSVWriter.PARAM_TRIM_WHITESPACE, + isOptionTrimWhitespace(), + CSVWriter.PARAM_CONTEXT_UNIT, + getOptionContextUnit(), + CSVWriter.PARAM_INCLUDE_LINE_NUMBERS, + Annotator.app != null + ? Annotator.app.getPreferences().getBoolean( + Constants.PLUGIN_CSV_INCLUDE_LINE_NUMBERS, + Defaults.CFG_OPTION_INCLUDE_LINE_NUMBERS) + : Defaults.CFG_OPTION_INCLUDE_LINE_NUMBERS)); return b.createAggregateDescription(); } @@ -104,57 +121,101 @@ public ExtensionFilter getExtensionFilter() { } @Override - public void showExportConfigurationDialog(JFrame parent, DocumentModel documentModel, + public void showExportConfigurationDialog( + JFrame parent, + DocumentModel documentModel, Consumer callback) { ImmutableList options = Lists.immutable.of( - new PluginOption.IntegerPluginOption(Annotator.app.getPreferences(), Constants.PLUGIN_CSV_CONTEXT_WIDTH, - Defaults.CFG_OPTION_CONTEXT_WIDTH, "dialog.export_options.context_width", - "dialog.export_options.context_width.tooltip", 0, 500, 25), - (PluginOption) new PluginOption.EnumPluginOption(ContextUnit.class, - Annotator.app.getPreferences(), Constants.PLUGIN_CSV_CONTEXT_UNIT, - Defaults.CFG_OPTION_CONTEXT_UNIT, "dialog.export_options.context_unit", - "dialog.export_options.context_unit.tooltip", Lists.immutable.of(ContextUnit.values()) - .select(cu -> cu.isPossible(documentModel.getJcas())).toArray(new ContextUnit[] {}), + new PluginOption.IntegerPluginOption( + Annotator.app.getPreferences(), + Constants.PLUGIN_CSV_CONTEXT_WIDTH, + Defaults.CFG_OPTION_CONTEXT_WIDTH, + "dialog.export_options.context_width", + "dialog.export_options.context_width.tooltip", + 0, + 500, + 25), + (PluginOption) new PluginOption.EnumPluginOption( + ContextUnit.class, + Annotator.app.getPreferences(), + Constants.PLUGIN_CSV_CONTEXT_UNIT, + Defaults.CFG_OPTION_CONTEXT_UNIT, + "dialog.export_options.context_unit", + "dialog.export_options.context_unit.tooltip", + Lists.immutable.of(ContextUnit.values()) + .select(cu -> cu.isPossible( + documentModel.getJcas())) + .toArray(new ContextUnit[] {}), new DefaultListCellRenderer() { private static final long serialVersionUID = 1L; @Override - public Component getListCellRendererComponent(JList list, Object value, int index, - boolean isSelected, boolean cellHasFocus) { - super.getListCellRendererComponent(list, value, index, isSelected, cellHasFocus); - setText(Annotator.getString("dialog.export_options.context_unit." + value.toString())); + public Component getListCellRendererComponent( + JList list, + Object value, + int index, + boolean isSelected, + boolean cellHasFocus) { + super.getListCellRendererComponent( + list, value, index, isSelected, cellHasFocus); + setText(Annotator.getString( + "dialog.export_options.context_unit." + value.toString())); return this; } }), - new BooleanPluginOption(Annotator.app.getPreferences(), Constants.PLUGIN_CSV_TRIM, - Defaults.CFG_OPTION_TRIM, "dialog.export_options.trim_whitespace", + new BooleanPluginOption( + Annotator.app.getPreferences(), + Constants.PLUGIN_CSV_TRIM, + Defaults.CFG_OPTION_TRIM, + "dialog.export_options.trim_whitespace", "dialog.export_options.trim_whitespace.tooltip"), - new BooleanPluginOption(Annotator.app.getPreferences(), Constants.PLUGIN_CSV_REPLACE_NEWLINES, - Defaults.CFG_OPTION_REPLACE_NEWLINES, "dialog.export_options.replace_newline", + new BooleanPluginOption( + Annotator.app.getPreferences(), + Constants.PLUGIN_CSV_REPLACE_NEWLINES, + Defaults.CFG_OPTION_REPLACE_NEWLINES, + "dialog.export_options.replace_newline", "dialog.export_options.replace_newline.tooltip"), - new BooleanPluginOption(Annotator.app.getPreferences(), Constants.PLUGIN_CSV_INCLUDE_LINE_NUMBERS, - Defaults.CFG_OPTION_INCLUDE_LINE_NUMBERS, "dialog.export_options.include_line_numbers", + new BooleanPluginOption( + Annotator.app.getPreferences(), + Constants.PLUGIN_CSV_INCLUDE_LINE_NUMBERS, + Defaults.CFG_OPTION_INCLUDE_LINE_NUMBERS, + "dialog.export_options.include_line_numbers", "dialog.export_options.include_line_numbers.tooltip")); new PluginConfigurationDialog(parent, this, callback, options).setVisible(true); } public int getOptionContextWidth() { - return Annotator.app.getPreferences().getInt((Constants.PLUGIN_CSV_CONTEXT_WIDTH), 30); + return Annotator.app != null + ? Annotator.app.getPreferences().getInt( + Constants.PLUGIN_CSV_CONTEXT_WIDTH, + Defaults.CFG_OPTION_CONTEXT_WIDTH) + : Defaults.CFG_OPTION_CONTEXT_WIDTH; } public boolean isOptionTrimWhitespace() { - return Annotator.app.getPreferences().getBoolean((Constants.PLUGIN_CSV_TRIM), true); + return Annotator.app != null + ? Annotator.app.getPreferences().getBoolean( + Constants.PLUGIN_CSV_TRIM, + Defaults.CFG_OPTION_TRIM) + : Defaults.CFG_OPTION_TRIM; } public boolean isOptionReplaceNewlines() { - return Annotator.app.getPreferences().getBoolean((Constants.PLUGIN_CSV_REPLACE_NEWLINES), true); + return Annotator.app != null + ? Annotator.app.getPreferences().getBoolean( + Constants.PLUGIN_CSV_REPLACE_NEWLINES, + Defaults.CFG_OPTION_REPLACE_NEWLINES) + : Defaults.CFG_OPTION_REPLACE_NEWLINES; } public ContextUnit getOptionContextUnit() { - return ContextUnit.valueOf( - Annotator.app.getPreferences().get((Constants.PLUGIN_CSV_CONTEXT_UNIT), ContextUnit.CHARACTER.name())); + return Annotator.app != null + ? ContextUnit.valueOf(Annotator.app.getPreferences().get( + Constants.PLUGIN_CSV_CONTEXT_UNIT, + Defaults.CFG_OPTION_CONTEXT_UNIT.name())) + : ContextUnit.valueOf(Defaults.CFG_OPTION_CONTEXT_UNIT.name()); } } diff --git a/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/Defaults.java b/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/Defaults.java index 85696248..7caa6824 100644 --- a/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/Defaults.java +++ b/src/main/java/de/unistuttgart/ims/coref/annotator/plugin/csv/Defaults.java @@ -3,6 +3,7 @@ import de.unistuttgart.ims.coref.annotator.plugin.csv.CsvExportPlugin.ContextUnit; public class Defaults { + public static final int CFG_OPTION_CONTEXT_WIDTH = 30; public static final ContextUnit CFG_OPTION_CONTEXT_UNIT = ContextUnit.CHARACTER; public static final boolean CFG_OPTION_TRIM = true; diff --git a/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java b/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java index c129cfa3..9e207136 100644 --- a/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java +++ b/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java @@ -36,13 +36,12 @@ * */ public class ExportMultipleFiles { + static Options options; - static ExportPlugin outputPlugin; - static Pattern filenamePattern = Pattern.compile("^(.*)\\.xmi(\\.gz)?"); - static PluginManager pluginManager; + public static void main(String[] args) throws ResourceInitializationException, ClassNotFoundException, InterruptedException, ExecutionException { @@ -80,6 +79,7 @@ public boolean accept(File pathname) { } } + /** * This function processes a single file. * @@ -123,9 +123,12 @@ public static void convertFile(File file, Options options) throws InterruptedExc w.get(); } + public enum OutputFormat { - tei, conll2012, json, qdtei, stats; + + tei, conll2012, json, csv, qdtei, stats; + @SuppressWarnings("unchecked") Class getPluginClass() { switch (this) { case stats: @@ -136,6 +139,8 @@ Class getPluginClass() { return de.unistuttgart.ims.coref.annotator.plugin.json.Plugin.class; case tei: return de.unistuttgart.ims.coref.annotator.plugin.tei.TeiExportPlugin.class; + case csv: + return de.unistuttgart.ims.coref.annotator.plugin.csv.CsvExportPlugin.class; case qdtei: // This is a temporary workaround try { @@ -152,16 +157,18 @@ Class getPluginClass() { } + public enum OutputFilename { input, documentId } + @CommandLineInterface(application = "ExportMultipleFiles") public interface Options { @Option(description = "Input file or directory.", shortName = "i") List getInput(); - @Option(defaultValue = "tei", description = "Target format. One of [tei, conll2012, json].") + @Option(defaultValue = "tei", description = "Target format. One of [tei, conll2012, json, csv, stats].") OutputFormat getOutputFormat(); @Option(defaultValue = ".", description = "Output directory. Defaults to current.", shortName = "o") @@ -173,4 +180,5 @@ public interface Options { @Option(helpRequest = true, shortName = "h", description = "Show help") boolean getHelp(); } + } From 3174063f32fe496c1eec769b9384b549575ff48f Mon Sep 17 00:00:00 2001 From: bkis Date: Thu, 16 Dec 2021 12:10:44 +0100 Subject: [PATCH 2/2] Expand input file name pattern in ExportMultipleFiles ... to properly recognize `.ca2z` files. Output file naming by `input` was broken because of this. --- .../ims/coref/annotator/tools/ExportMultipleFiles.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java b/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java index 9e207136..728c2176 100644 --- a/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java +++ b/src/main/java/de/unistuttgart/ims/coref/annotator/tools/ExportMultipleFiles.java @@ -39,7 +39,7 @@ public class ExportMultipleFiles { static Options options; static ExportPlugin outputPlugin; - static Pattern filenamePattern = Pattern.compile("^(.*)\\.xmi(\\.gz)?"); + static Pattern filenamePattern = Pattern.compile("^(.*?)\\.(?:xmi(?:\\.gz)?|ca2z)"); static PluginManager pluginManager; @@ -97,7 +97,7 @@ public static void convertFile(File file, Options options) throws InterruptedExc if (m.find()) { namePart = m.group(1); } - + // load jcas from file JCasLoader loader = new JCasLoader(file, pluginManager.getDefaultIOPlugin(), "xx-unspecified", null, ex -> { ex.printStackTrace();