Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabled parallel processing of simplification. #14

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,4 @@ buildNumber.properties
!/.mvn/wrapper/maven-wrapper.jar

# End of https://www.gitignore.io/api/intellij,jetbrains,java,maven,eclipse
.idea
16 changes: 0 additions & 16 deletions .idea/compiler.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/copyright/Lambda___GPL_v3_.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/copyright/Lambda___MIT_.xml

This file was deleted.

3 changes: 0 additions & 3 deletions .idea/copyright/profiles_settings.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/encodings.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/inspectionProfiles/Project_Default.xml

This file was deleted.

8 changes: 0 additions & 8 deletions .idea/modules.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/vcs.xml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import org.lambda3.text.simplification.discourse.model.SimplificationContent;
import org.lambda3.text.simplification.discourse.runner.discourse_extraction.DiscourseExtractor;
import org.lambda3.text.simplification.discourse.runner.discourse_tree.DiscourseTreeCreator;
import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
import org.lambda3.text.simplification.discourse.utils.ConfigUtils;
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils;
Expand All @@ -44,15 +46,16 @@
*
*/
public class DiscourseSimplifier {
private final DiscourseTreeCreator discourseTreeCreator;
private final DiscourseExtractor discourseExtractor;
private final List<Relation> ignoredRelations;
private final List<ExtractionRule> extractionRules;
private final SentencePreprocessor preprocessor;

private final Logger logger = LoggerFactory.getLogger(getClass());

public DiscourseSimplifier(Config config) {
SentencePreprocessor preprocessor = new SentencePreprocessor(config);
this.discourseTreeCreator = new DiscourseTreeCreator(config, preprocessor);
this.discourseExtractor = new DiscourseExtractor(config);
this.preprocessor = new SentencePreprocessor(config);
this.ignoredRelations = DiscourseExtractor.extractIgnoredRelationsFromConfig(config);
this.extractionRules = DiscourseTreeCreator.extractRulesFromConfig(config);

logger.debug("DiscourseSimplifier initialized");
logger.debug("\n{}", ConfigUtils.prettyPrint(config));
Expand Down Expand Up @@ -89,7 +92,7 @@ public SimplificationContent doDiscourseSimplification(List<String> sentences, P
// creates one discourse discourse_tree over all sentences (investigates intra-sentential and inter-sentential relations)
private SimplificationContent processWhole(List<String> sentences) {
SimplificationContent content = new SimplificationContent();

DiscourseTreeCreator discourseTreeCreator = new DiscourseTreeCreator(extractionRules, preprocessor);
// Step 1) create document discourse discourse_tree
logger.info("### STEP 1) CREATE DOCUMENT DISCOURSE TREE ###");
discourseTreeCreator.reset();
Expand Down Expand Up @@ -120,6 +123,8 @@ private SimplificationContent processWhole(List<String> sentences) {

// Step 2) do discourse extraction
logger.info("### STEP 2) DO DISCOURSE EXTRACTION ###");

DiscourseExtractor discourseExtractor = new DiscourseExtractor(ignoredRelations);
List<Element> elements = discourseExtractor.doDiscourseExtraction(discourseTreeCreator.getDiscourseTree());
elements.forEach(e -> content.addElement(e));
if (logger.isDebugEnabled()) {
Expand All @@ -133,6 +138,7 @@ private SimplificationContent processWhole(List<String> sentences) {
// creates discourse trees for each individual sentence (investigates intra-sentential relations only)
private SimplificationContent processSeparate(List<String> sentences) {
SimplificationContent content = new SimplificationContent();
DiscourseTreeCreator discourseTreeCreator = new DiscourseTreeCreator(extractionRules, preprocessor);

int idx = 0;
for (String sentence : sentences) {
Expand All @@ -153,6 +159,7 @@ private SimplificationContent processSeparate(List<String> sentences) {

// Step 2) do discourse extraction
logger.debug("### STEP 2) DO DISCOURSE EXTRACTION ###");
DiscourseExtractor discourseExtractor = new DiscourseExtractor(ignoredRelations);
List<Element> elements = discourseExtractor.doDiscourseExtraction(discourseTreeCreator.getDiscourseTree());
elements.forEach(e -> outSentence.addElement(e));
logger.debug(outSentence.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,12 @@ public class DiscourseExtractor {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final List<Relation> ignoredRelations;

private final Config config;
private LinkedHashMap<Leaf, Element> processedLeaves;

public DiscourseExtractor(Config config) {
this.config = config;

// create ignored relations from config
this.ignoredRelations = new ArrayList<>();
for (String valueName : this.config.getStringList("ignored-relations")) {
public static List<Relation> extractIgnoredRelationsFromConfig(Config config) {
Logger logger = LoggerFactory.getLogger(DiscourseExtractor.class);
List<Relation> ignoredRelations = new ArrayList<>();
for (String valueName : config.getStringList("ignored-relations")) {
try {
Relation relation = Relation.valueOf(valueName);
ignoredRelations.add(relation);
Expand All @@ -64,10 +61,21 @@ public DiscourseExtractor(Config config) {
throw new ConfigException.BadValue("ignored-relations." + valueName, "Failed to create enum value.");
}
}
return ignoredRelations;
}

public DiscourseExtractor(List<Relation> ignoredRelations) {
this.ignoredRelations = ignoredRelations;
this.processedLeaves = new LinkedHashMap<Leaf, Element>();
}

public DiscourseExtractor(Config config) {

// create ignored relations from config
this(extractIgnoredRelationsFromConfig(config));

}

public List<Element> doDiscourseExtraction(DiscourseTree discourseTree) {
this.processedLeaves = new LinkedHashMap<Leaf, Element>();

Expand Down Expand Up @@ -97,7 +105,7 @@ private void addAsContext(Leaf leaf, Leaf targetLeaf, Relation targetRelation) {
private void extractRec(DiscourseTree node, int contextLayer) {

if (node instanceof Leaf) {
Leaf leaf = (Leaf)node;
Leaf leaf = (Leaf) node;
if (!leaf.isToSimpleContext()) {

// create new element
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,17 @@
*
*/
public class DiscourseTreeCreator {
private final Config config;
//private final Config config;
private final SentencePreprocessor preprocessor;
private final List<ExtractionRule> rules;

private final Logger logger = LoggerFactory.getLogger(getClass());
private Coordination discourseTree;

public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) {
this.config = config;
this.preprocessor = preprocessor;

// create rules from config
this.rules = new ArrayList<>();
for (String className : this.config.getStringList("rules")) {
public static List<ExtractionRule> extractRulesFromConfig(Config config) {
Logger logger = LoggerFactory.getLogger(DiscourseTreeCreator.class);
List<ExtractionRule> rules = new ArrayList<>();
for (String className : config.getStringList("rules")) {
try {
Class<?> clazz = Class.forName(className);
Constructor<?> constructor = clazz.getConstructor();
Expand All @@ -69,7 +66,23 @@ public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) {
throw new ConfigException.BadValue("rules." + className, "Failed to create instance.");
}
}
return rules;
}


public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) {
//this.config = config;
//this.preprocessor = preprocessor;

// create rules from config
//this.rules = ;
this(DiscourseTreeCreator.extractRulesFromConfig(config), preprocessor);
reset();
}

public DiscourseTreeCreator(List<ExtractionRule> rules, SentencePreprocessor preprocessor) {
this.preprocessor = preprocessor;
this.rules = rules;
reset();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public void setConfig(Config config) {
this.classifer = new CuePhraseClassifier(config);
}


public abstract Optional<Extraction> extract(Leaf leaf) throws ParseTreeException;

protected static List<Tree> getSiblings(Tree parseTree, List<String> tags) {
Expand Down Expand Up @@ -186,49 +187,49 @@ protected static List<Word> rephraseEnablement(Tree s, Tree vp) {
return res;
}


protected static String rephraseApposition(Tree vp, String np) {
String res = "";

Tense tense = getTense(vp);
//Number number = getNumber(np);
if (tense.equals(Tense.PRESENT)) {
if (np.equals("NN") || np.equals("NNP")) {
res = " is ";
} else {
res = " are ";
}
if (np.equals("NN") || np.equals("NNP")) {
res = " is ";
} else {
res = " are ";
}
} else {
if (np.equals("NN") || np.equals("NNP")) {
res = " was ";
} else {
res = " were ";
}
if (np.equals("NN") || np.equals("NNP")) {
res = " was ";
} else {
res = " were ";
}
}

return res;
}

protected static List<Word> rephraseAppositionNonRes(Tree vp, Tree np, Tree np2) {
List<Word> res = new ArrayList<>();

Tense tense = getTense(vp);
Number number = getNumber(np);
if (tense.equals(Tense.PRESENT)) {
if (number.equals(Number.SINGULAR)) {
res.add(new Word("is"));
} else {
res.add(new Word("are"));
}
if (number.equals(Number.SINGULAR)) {
res.add(new Word("is"));
} else {
res.add(new Word("are"));
}
} else {
if (number.equals(Number.SINGULAR)) {
res.add(new Word("was"));
} else {
res.add(new Word("were"));
}
if (number.equals(Number.SINGULAR)) {
res.add(new Word("was"));
} else {
res.add(new Word("were"));
}
}
res = appendWordsFromTree(res, np2);

return res;
}

Expand All @@ -245,7 +246,7 @@ protected static List<Word> getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tr
if (matcher.findAt(s)) {
List<Word> res = new ArrayList<>();

res.add(new Word((number.equals(Number.SINGULAR))? "has" : "have"));
res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have"));
res.add(new Word("been"));
List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true);
if (next.size() > 0) {
Expand All @@ -260,7 +261,7 @@ protected static List<Word> getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tr
if (matcher.findAt(s)) {
List<Word> res = new ArrayList<>();

res.add(new Word((number.equals(Number.SINGULAR))? "has" : "have"));
res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have"));
List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true);
if (next.size() > 0) {
next.set(0, WordsUtils.lowercaseWord(next.get(0)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ public Leaf(String extractionRule, String text) throws ParseTreeException {
this(extractionRule, ParseTreeParser.parse(text));
}

// public void saveTree() {
//
// }

public void dontAllowSplit() {
this.allowSplit = false;
}
Expand All @@ -73,6 +77,7 @@ public String getText() {
return WordsUtils.wordsToString(ParseTreeExtractionUtils.getContainingWords(parseTree));
}


public void setToSimpleContext(boolean toSimpleContext) {
this.toSimpleContext = toSimpleContext;
}
Expand Down