From 72c3fd4296cdfbda13511b72d74bc5d29e3d3a82 Mon Sep 17 00:00:00 2001 From: viktor Date: Fri, 9 Aug 2019 16:57:23 +0900 Subject: [PATCH] Enabled parallel processing of simplification. * Decoupled instances of DiscourseSimplifier and (the stateful instances of) DiscourseTreeCreator and DiscourseExtractor. * DiscourseTreeCreator and DiscourseExtractor are now created and used only once for every Simplfication. * Refactored the config parsing procedure to produce the list of rules and ignored relations outside of the constructor. This way it only needs to be called once in the beginning. --- .gitignore | 1 + .idea/compiler.xml | 16 ------ .idea/copyright/Lambda___GPL_v3_.xml | 6 --- .idea/copyright/Lambda___MIT_.xml | 6 --- .idea/copyright/profiles_settings.xml | 3 -- .idea/encodings.xml | 6 --- .idea/inspectionProfiles/Project_Default.xml | 6 --- .idea/modules.xml | 8 --- .idea/vcs.xml | 6 --- .../processing/DiscourseSimplifier.java | 19 ++++--- .../DiscourseExtractor.java | 24 ++++++--- .../discourse_tree/DiscourseTreeCreator.java | 29 +++++++--- .../extraction/ExtractionRule.java | 53 ++++++++++--------- .../runner/discourse_tree/model/Leaf.java | 5 ++ 14 files changed, 83 insertions(+), 105 deletions(-) delete mode 100644 .idea/compiler.xml delete mode 100644 .idea/copyright/Lambda___GPL_v3_.xml delete mode 100644 .idea/copyright/Lambda___MIT_.xml delete mode 100644 .idea/copyright/profiles_settings.xml delete mode 100644 .idea/encodings.xml delete mode 100644 .idea/inspectionProfiles/Project_Default.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.gitignore b/.gitignore index 0212d0c..0a72841 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,4 @@ buildNumber.properties !/.mvn/wrapper/maven-wrapper.jar # End of https://www.gitignore.io/api/intellij,jetbrains,java,maven,eclipse +.idea diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index 4e42293..0000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/copyright/Lambda___GPL_v3_.xml b/.idea/copyright/Lambda___GPL_v3_.xml deleted file mode 100644 index d201bc0..0000000 --- a/.idea/copyright/Lambda___GPL_v3_.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/copyright/Lambda___MIT_.xml b/.idea/copyright/Lambda___MIT_.xml deleted file mode 100644 index c987fe2..0000000 --- a/.idea/copyright/Lambda___MIT_.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml deleted file mode 100644 index ca4cabf..0000000 --- a/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml deleted file mode 100644 index b26911b..0000000 --- a/.idea/encodings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index 6612519..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 25ec0e9..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java b/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java index 4a34555..eea1d2c 100644 --- a/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java +++ b/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java @@ -29,6 +29,8 @@ import org.lambda3.text.simplification.discourse.model.SimplificationContent; import org.lambda3.text.simplification.discourse.runner.discourse_extraction.DiscourseExtractor; import org.lambda3.text.simplification.discourse.runner.discourse_tree.DiscourseTreeCreator; +import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation; +import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule; import org.lambda3.text.simplification.discourse.utils.ConfigUtils; import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException; import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils; @@ -44,15 +46,16 @@ * */ public class DiscourseSimplifier { - private final DiscourseTreeCreator discourseTreeCreator; - private final DiscourseExtractor discourseExtractor; + private final List ignoredRelations; + private final List extractionRules; + private final SentencePreprocessor preprocessor; private final Logger logger = LoggerFactory.getLogger(getClass()); public DiscourseSimplifier(Config config) { - SentencePreprocessor preprocessor = new SentencePreprocessor(config); - this.discourseTreeCreator = new DiscourseTreeCreator(config, preprocessor); - this.discourseExtractor = new DiscourseExtractor(config); + this.preprocessor = new SentencePreprocessor(config); + this.ignoredRelations = DiscourseExtractor.extractIgnoredRelationsFromConfig(config); + this.extractionRules = DiscourseTreeCreator.extractRulesFromConfig(config); logger.debug("DiscourseSimplifier initialized"); logger.debug("\n{}", ConfigUtils.prettyPrint(config)); @@ -89,7 +92,7 @@ public SimplificationContent doDiscourseSimplification(List sentences, P // creates one discourse discourse_tree over all sentences (investigates intra-sentential and inter-sentential relations) private SimplificationContent processWhole(List sentences) { SimplificationContent content = new SimplificationContent(); - + DiscourseTreeCreator discourseTreeCreator = new DiscourseTreeCreator(extractionRules, preprocessor); // Step 1) create document discourse discourse_tree logger.info("### STEP 1) CREATE DOCUMENT DISCOURSE TREE ###"); discourseTreeCreator.reset(); @@ -120,6 +123,8 @@ private SimplificationContent processWhole(List sentences) { // Step 2) do discourse extraction logger.info("### STEP 2) DO DISCOURSE EXTRACTION ###"); + + DiscourseExtractor discourseExtractor = new DiscourseExtractor(ignoredRelations); List elements = discourseExtractor.doDiscourseExtraction(discourseTreeCreator.getDiscourseTree()); elements.forEach(e -> content.addElement(e)); if (logger.isDebugEnabled()) { @@ -133,6 +138,7 @@ private SimplificationContent processWhole(List sentences) { // creates discourse trees for each individual sentence (investigates intra-sentential relations only) private SimplificationContent processSeparate(List sentences) { SimplificationContent content = new SimplificationContent(); + DiscourseTreeCreator discourseTreeCreator = new DiscourseTreeCreator(extractionRules, preprocessor); int idx = 0; for (String sentence : sentences) { @@ -153,6 +159,7 @@ private SimplificationContent processSeparate(List sentences) { // Step 2) do discourse extraction logger.debug("### STEP 2) DO DISCOURSE EXTRACTION ###"); + DiscourseExtractor discourseExtractor = new DiscourseExtractor(ignoredRelations); List elements = discourseExtractor.doDiscourseExtraction(discourseTreeCreator.getDiscourseTree()); elements.forEach(e -> outSentence.addElement(e)); logger.debug(outSentence.toString()); diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java index ddcf0da..a88bf45 100644 --- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java +++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java @@ -47,15 +47,12 @@ public class DiscourseExtractor { private final Logger logger = LoggerFactory.getLogger(getClass()); private final List ignoredRelations; - private final Config config; private LinkedHashMap processedLeaves; - public DiscourseExtractor(Config config) { - this.config = config; - - // create ignored relations from config - this.ignoredRelations = new ArrayList<>(); - for (String valueName : this.config.getStringList("ignored-relations")) { + public static List extractIgnoredRelationsFromConfig(Config config) { + Logger logger = LoggerFactory.getLogger(DiscourseExtractor.class); + List ignoredRelations = new ArrayList<>(); + for (String valueName : config.getStringList("ignored-relations")) { try { Relation relation = Relation.valueOf(valueName); ignoredRelations.add(relation); @@ -64,10 +61,21 @@ public DiscourseExtractor(Config config) { throw new ConfigException.BadValue("ignored-relations." + valueName, "Failed to create enum value."); } } + return ignoredRelations; + } + public DiscourseExtractor(List ignoredRelations) { + this.ignoredRelations = ignoredRelations; this.processedLeaves = new LinkedHashMap(); } + public DiscourseExtractor(Config config) { + + // create ignored relations from config + this(extractIgnoredRelationsFromConfig(config)); + + } + public List doDiscourseExtraction(DiscourseTree discourseTree) { this.processedLeaves = new LinkedHashMap(); @@ -97,7 +105,7 @@ private void addAsContext(Leaf leaf, Leaf targetLeaf, Relation targetRelation) { private void extractRec(DiscourseTree node, int contextLayer) { if (node instanceof Leaf) { - Leaf leaf = (Leaf)node; + Leaf leaf = (Leaf) node; if (!leaf.isToSimpleContext()) { // create new element diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java index 1d2d12a..62d12be 100644 --- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java +++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java @@ -43,20 +43,17 @@ * */ public class DiscourseTreeCreator { - private final Config config; + //private final Config config; private final SentencePreprocessor preprocessor; private final List rules; private final Logger logger = LoggerFactory.getLogger(getClass()); private Coordination discourseTree; - public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) { - this.config = config; - this.preprocessor = preprocessor; - - // create rules from config - this.rules = new ArrayList<>(); - for (String className : this.config.getStringList("rules")) { + public static List extractRulesFromConfig(Config config) { + Logger logger = LoggerFactory.getLogger(DiscourseTreeCreator.class); + List rules = new ArrayList<>(); + for (String className : config.getStringList("rules")) { try { Class clazz = Class.forName(className); Constructor constructor = clazz.getConstructor(); @@ -69,7 +66,23 @@ public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) { throw new ConfigException.BadValue("rules." + className, "Failed to create instance."); } } + return rules; + } + + public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) { + //this.config = config; + //this.preprocessor = preprocessor; + + // create rules from config + //this.rules = ; + this(DiscourseTreeCreator.extractRulesFromConfig(config), preprocessor); + reset(); + } + + public DiscourseTreeCreator(List rules, SentencePreprocessor preprocessor) { + this.preprocessor = preprocessor; + this.rules = rules; reset(); } diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java index 99880d4..29e6322 100644 --- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java +++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java @@ -64,6 +64,7 @@ public void setConfig(Config config) { this.classifer = new CuePhraseClassifier(config); } + public abstract Optional extract(Leaf leaf) throws ParseTreeException; protected static List getSiblings(Tree parseTree, List tags) { @@ -186,49 +187,49 @@ protected static List rephraseEnablement(Tree s, Tree vp) { return res; } - + protected static String rephraseApposition(Tree vp, String np) { String res = ""; Tense tense = getTense(vp); //Number number = getNumber(np); if (tense.equals(Tense.PRESENT)) { - if (np.equals("NN") || np.equals("NNP")) { - res = " is "; - } else { - res = " are "; - } + if (np.equals("NN") || np.equals("NNP")) { + res = " is "; + } else { + res = " are "; + } } else { - if (np.equals("NN") || np.equals("NNP")) { - res = " was "; - } else { - res = " were "; - } + if (np.equals("NN") || np.equals("NNP")) { + res = " was "; + } else { + res = " were "; + } } - + return res; } - + protected static List rephraseAppositionNonRes(Tree vp, Tree np, Tree np2) { List res = new ArrayList<>(); Tense tense = getTense(vp); Number number = getNumber(np); if (tense.equals(Tense.PRESENT)) { - if (number.equals(Number.SINGULAR)) { - res.add(new Word("is")); - } else { - res.add(new Word("are")); - } + if (number.equals(Number.SINGULAR)) { + res.add(new Word("is")); + } else { + res.add(new Word("are")); + } } else { - if (number.equals(Number.SINGULAR)) { - res.add(new Word("was")); - } else { - res.add(new Word("were")); - } + if (number.equals(Number.SINGULAR)) { + res.add(new Word("was")); + } else { + res.add(new Word("were")); + } } res = appendWordsFromTree(res, np2); - + return res; } @@ -245,7 +246,7 @@ protected static List getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tr if (matcher.findAt(s)) { List res = new ArrayList<>(); - res.add(new Word((number.equals(Number.SINGULAR))? "has" : "have")); + res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have")); res.add(new Word("been")); List next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { @@ -260,7 +261,7 @@ protected static List getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tr if (matcher.findAt(s)) { List res = new ArrayList<>(); - res.add(new Word((number.equals(Number.SINGULAR))? "has" : "have")); + res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have")); List next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java index 62a3027..154dc54 100644 --- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java +++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java @@ -57,6 +57,10 @@ public Leaf(String extractionRule, String text) throws ParseTreeException { this(extractionRule, ParseTreeParser.parse(text)); } +// public void saveTree() { +// +// } + public void dontAllowSplit() { this.allowSplit = false; } @@ -73,6 +77,7 @@ public String getText() { return WordsUtils.wordsToString(ParseTreeExtractionUtils.getContainingWords(parseTree)); } + public void setToSimpleContext(boolean toSimpleContext) { this.toSimpleContext = toSimpleContext; }