Skip to content

Commit

Permalink
issue #137: parse boolean strings
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Jun 22, 2022
1 parent 0d2ecd5 commit 4cfba9d
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 90 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package de.gwdg.metadataqa.marc.cli.utils.ignorablerecords;

import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.utils.parser.BooleanContainer;
import de.gwdg.metadataqa.marc.utils.parser.BooleanParser;
import de.gwdg.metadataqa.marc.utils.pica.path.PicaPath;
import de.gwdg.metadataqa.marc.utils.pica.path.PicaPathParser;
import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.List;
Expand All @@ -12,13 +16,26 @@ public class PicaFilter {
protected static final Pattern CRITERIUM = Pattern.compile("^([012\\.][A-Za-z0-9@\\./\\$\\*\\-]+?)(\\s*(==|!=|=~|!~|=\\^|=\\$)\\s*'([^']+)'|\\?)$");

protected List<CriteriumPica> criteria = new ArrayList<>();
protected BooleanContainer<CriteriumPica> booleanCriteria;

protected void parse(String ignorableRecordsInput) {
String[] rawCriteria = ignorableRecordsInput.split(",");
for (String rawCriterium : rawCriteria) {
if (!rawCriterium.isEmpty())
criteria.add(parseCriterium(rawCriterium));

if (StringUtils.isNotBlank(ignorableRecordsInput)) {
booleanCriteria = transformContainer(BooleanParser.parse(ignorableRecordsInput));
}
}

private BooleanContainer<CriteriumPica> transformContainer(BooleanContainer<String> booleanCriteria) {
BooleanContainer<CriteriumPica> container = new BooleanContainer<CriteriumPica>();
container.setOp(booleanCriteria.getOp());
if (booleanCriteria.getValue() != null) {
container.setValue(parseCriterium(booleanCriteria.getValue()));
} else if (!booleanCriteria.getChildren().isEmpty()) {
for (BooleanContainer child : booleanCriteria.getChildren()) {
container.getChildren().add(transformContainer(child));
}
}
return container;
}

protected CriteriumPica parseCriterium(String rawCriterium) {
Expand All @@ -45,10 +62,42 @@ public List<CriteriumPica> getCriteria() {
return criteria;
}

public BooleanContainer<CriteriumPica> getBooleanCriteria() {
return booleanCriteria;
}

public boolean metCriteria(MarcRecord marcRecord, BooleanContainer<CriteriumPica> criteria) {
boolean passed = false;
if (criteria.getValue() != null) {
passed = criteria.getValue().met(marcRecord);
} else {
boolean hasPassed = false;
boolean hasFailed = false;
for (BooleanContainer<CriteriumPica> container : criteria.getChildren()) {
boolean p = metCriteria(marcRecord, container);
if (p && !hasPassed)
hasPassed = true;
if (!p && !hasFailed)
hasFailed = true;
if (criteria.getOp().equals(BooleanContainer.Op.AND) && !p) {
break;
}
if (criteria.getOp().equals(BooleanContainer.Op.OR) && p) {
break;
}
}
if (criteria.getOp().equals(BooleanContainer.Op.OR))
passed = hasPassed;
else if (criteria.getOp().equals(BooleanContainer.Op.AND))
passed = hasPassed && !hasFailed;
}
return passed;
}

@Override
public String toString() {
return "PicaFilter{" +
"criteria=" + criteria +
"criteria=" + booleanCriteria +
'}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,14 @@ public RecordFilterPica(String allowableRecordsInput) {

@Override
public boolean isEmpty() {
return criteria.isEmpty();
return getBooleanCriteria() == null;
}

@Override
public boolean isAllowable(MarcRecord marcRecord) {
if (isEmpty())
return true;

for (CriteriumPica criterium : criteria) {
boolean passed = criterium.met(marcRecord);
if (passed)
return passed;
}
return false;
return metCriteria(marcRecord, booleanCriteria);
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package de.gwdg.metadataqa.marc.cli.utils.ignorablerecords;

import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.utils.parser.BooleanContainer;

import java.io.Serializable;

Expand All @@ -14,16 +15,15 @@ public RecordIgnoratorPica(String ignorableRecordsInput) {

@Override
public boolean isEmpty() {
return criteria.isEmpty();
return getBooleanCriteria() == null;
}

@Override
public boolean isIgnorable(MarcRecord marcRecord) {
for (CriteriumPica criterium : criteria) {
boolean passed = criterium.met(marcRecord);
if (passed)
return passed;
}
return false;
if (isEmpty())
return true;

return metCriteria(marcRecord, booleanCriteria);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
import java.util.ArrayList;
import java.util.List;

public class BooleanContainer {
public class BooleanContainer<T> {

public enum Op{AND, OR};
public enum Op{AND, OR}

private Op op;
private List<BooleanContainer> children = new ArrayList<>();
private Object value;
private List<BooleanContainer<T>> children = new ArrayList<>();
private T value;

public BooleanContainer() {}

public BooleanContainer(String value) {
public BooleanContainer(T value) {
this.value = value;
}

public BooleanContainer(Op op, List<BooleanContainer> children) {
public BooleanContainer(Op op, List<BooleanContainer<T>> children) {
this.op = op;
this.children = children;
}
Expand All @@ -32,14 +32,30 @@ public void setOp(Op op) {
this.op = op;
}

public List<BooleanContainer> getChildren() {
public List<BooleanContainer<T>> getChildren() {
return children;
}

public Object getValue() {
public T getValue() {
return value;
}

public void setValue(T value) {
this.value = value;
}

public int size() {
int size = 0;
if (value != null)
size++;
if (children != null) {
for (BooleanContainer child : children) {
size += child.size();
}
}
return size;
}

@Override
public String toString() {
List<String> props = new ArrayList<>();
Expand All @@ -49,6 +65,6 @@ public String toString() {
props.add("children=" + children);
if (value != null)
props.add("value='" + value + '\'');
return this.getClass().getSimpleName() + "{" + StringUtils.join(props) + '}';
return this.getClass().getSimpleName() + "{" + StringUtils.join(props, ", ") + '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,63 +10,68 @@
*/
public class BooleanParser {

private BooleanParser() {}
String token = "";
String last = "";
int start = 0;
boolean skippedOp = false;
String input;
Deque<Integer> parens = new LinkedList<>();

public static BooleanContainer parse(String input) {
String token = "";
String last = "";
int start = 0;
boolean skippedOp = false;
BooleanContainer root = new BooleanContainer();
Deque<Integer> parens = new LinkedList<>();
private BooleanParser(String input) {
this.input = input;
}

public static BooleanContainer<String> parse(String _input) {
BooleanParser parser = new BooleanParser(_input);
return parser.parse();
}

private BooleanContainer<String> parse() {
BooleanContainer<String> root = new BooleanContainer();
for (int i = 0; i < input.length(); i++) {
String n = input.substring(i, i+1);
if (n.equals("&") && last.equals("&")) {
if (parens.isEmpty()) {
token = input.substring(start, i-1).trim();
if (root.getOp() == null) {
root.setOp(BooleanContainer.Op.AND);
}
addChild(root, token);
start = i+1;
skippedOp = false;
} else {
skippedOp = true;
}
processOp(i, root, BooleanContainer.Op.AND);
} else if (n.equals("|") && last.equals("|")) {
if (parens.isEmpty()) {
token = input.substring(start, i-1).trim();
if (root.getOp() == null) {
root.setOp(BooleanContainer.Op.OR);
}
addChild(root, token);
start = i+1;
skippedOp = false;
} else {
skippedOp = true;
}
processOp(i, root, BooleanContainer.Op.OR);
} else if (n.equals("(")) {
parens.add(i);
} else if (n.equals(")")) {
if (parens.isEmpty()) {
System.err.println("Error: closing parens without opening one: " + input);
}
if (parens.isEmpty())
throw new IllegalArgumentException("Error: closing parens without opening one: " + input);
parens.pollLast();
}
last = n;
}
token = input.substring(start).trim();
addChild(root, token);
if (!parens.isEmpty()) {
System.err.println("Error: opening parens without closing one: " + input);
}
if (!parens.isEmpty())
throw new IllegalArgumentException("Error: opening parens without closing one: " + input);
return root;
}

private static void addChild(BooleanContainer root, String token) {
private void processOp(int i, BooleanContainer root, BooleanContainer.Op and) {
if (parens.isEmpty()) {
if (root.getOp() == null)
root.setOp(and);
addChild(root, input.substring(start, i -1).trim());
start = i +1;
skippedOp = false;
} else {
skippedOp = true;
}
}

private void addChild(BooleanContainer root, String token) {
if (skippedOp && !(token.startsWith("(") && token.endsWith(")")))
throw new IllegalArgumentException("internal operator with imperfect parenthes: " + input);

BooleanContainer child = (token.startsWith("(") && token.endsWith(")"))
? parse(token.substring(1, token.length()-1))
: new BooleanContainer(token);
root.getChildren().add(child);
if (child.getValue() != null && child.getOp() == null && root.getOp() == null)
root.setValue(child.getValue());
else
root.getChildren().add(child);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,10 @@ public void getRecordIgnorator_pica() {
logger.log(Level.WARNING, "error in schemaType()", e);
}
assertEquals("RecordIgnoratorPica", parameters.getRecordIgnorator().getClass().getSimpleName());
assertEquals(1, ((RecordIgnoratorPica)parameters.getRecordIgnorator()).getCriteria().size());
assertEquals("CriteriumPica{[email protected], operator=NOT_MATCH, value='^L'}",
((RecordIgnoratorPica)parameters.getRecordIgnorator()).getCriteria().get(0).toString());
RecordIgnoratorPica recordIgnorator = (RecordIgnoratorPica)parameters.getRecordIgnorator();
assertNotNull(recordIgnorator.getBooleanCriteria());
assertEquals("BooleanContainer{value='CriteriumPica{[email protected], operator=NOT_MATCH, value='^L'}'}",
recordIgnorator.getBooleanCriteria().toString());
}

@Test
Expand Down Expand Up @@ -356,9 +357,10 @@ public void getRecordFilter_pica() {
logger.log(Level.WARNING, "error in schemaType()", e);
}
assertEquals("RecordFilterPica", parameters.getRecordFilter().getClass().getSimpleName());
assertEquals(1, ((RecordFilterPica)parameters.getRecordFilter()).getCriteria().size());
assertEquals("CriteriumPica{[email protected], operator=NOT_MATCH, value='^L'}",
((RecordFilterPica)parameters.getRecordFilter()).getCriteria().get(0).toString());
RecordFilterPica recordFilter = (RecordFilterPica)parameters.getRecordFilter();
assertNotNull(recordFilter.getBooleanCriteria());
assertEquals("BooleanContainer{value='CriteriumPica{[email protected], operator=NOT_MATCH, value='^L'}'}",
recordFilter.getBooleanCriteria().toString());
}

@Test
Expand Down
Loading

0 comments on commit 4cfba9d

Please sign in to comment.