diff --git a/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/PicaFilter.java b/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/PicaFilter.java index 3b07200a9..2091144fe 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/PicaFilter.java +++ b/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/PicaFilter.java @@ -1,7 +1,11 @@ package de.gwdg.metadataqa.marc.cli.utils.ignorablerecords; +import de.gwdg.metadataqa.marc.dao.MarcRecord; +import de.gwdg.metadataqa.marc.utils.parser.BooleanContainer; +import de.gwdg.metadataqa.marc.utils.parser.BooleanParser; import de.gwdg.metadataqa.marc.utils.pica.path.PicaPath; import de.gwdg.metadataqa.marc.utils.pica.path.PicaPathParser; +import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; import java.util.List; @@ -12,13 +16,26 @@ public class PicaFilter { protected static final Pattern CRITERIUM = Pattern.compile("^([012\\.][A-Za-z0-9@\\./\\$\\*\\-]+?)(\\s*(==|!=|=~|!~|=\\^|=\\$)\\s*'([^']+)'|\\?)$"); protected List criteria = new ArrayList<>(); + protected BooleanContainer booleanCriteria; protected void parse(String ignorableRecordsInput) { - String[] rawCriteria = ignorableRecordsInput.split(","); - for (String rawCriterium : rawCriteria) { - if (!rawCriterium.isEmpty()) - criteria.add(parseCriterium(rawCriterium)); + + if (StringUtils.isNotBlank(ignorableRecordsInput)) { + booleanCriteria = transformContainer(BooleanParser.parse(ignorableRecordsInput)); + } + } + + private BooleanContainer transformContainer(BooleanContainer booleanCriteria) { + BooleanContainer container = new BooleanContainer(); + container.setOp(booleanCriteria.getOp()); + if (booleanCriteria.getValue() != null) { + container.setValue(parseCriterium(booleanCriteria.getValue())); + } else if (!booleanCriteria.getChildren().isEmpty()) { + for (BooleanContainer child : booleanCriteria.getChildren()) { + container.getChildren().add(transformContainer(child)); + } } + return container; } protected CriteriumPica parseCriterium(String rawCriterium) { @@ -45,10 +62,42 @@ public List getCriteria() { return criteria; } + public BooleanContainer getBooleanCriteria() { + return booleanCriteria; + } + + public boolean metCriteria(MarcRecord marcRecord, BooleanContainer criteria) { + boolean passed = false; + if (criteria.getValue() != null) { + passed = criteria.getValue().met(marcRecord); + } else { + boolean hasPassed = false; + boolean hasFailed = false; + for (BooleanContainer container : criteria.getChildren()) { + boolean p = metCriteria(marcRecord, container); + if (p && !hasPassed) + hasPassed = true; + if (!p && !hasFailed) + hasFailed = true; + if (criteria.getOp().equals(BooleanContainer.Op.AND) && !p) { + break; + } + if (criteria.getOp().equals(BooleanContainer.Op.OR) && p) { + break; + } + } + if (criteria.getOp().equals(BooleanContainer.Op.OR)) + passed = hasPassed; + else if (criteria.getOp().equals(BooleanContainer.Op.AND)) + passed = hasPassed && !hasFailed; + } + return passed; + } + @Override public String toString() { return "PicaFilter{" + - "criteria=" + criteria + + "criteria=" + booleanCriteria + '}'; } } diff --git a/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordFilterPica.java b/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordFilterPica.java index 4a1d49df1..b31cccf9d 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordFilterPica.java +++ b/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordFilterPica.java @@ -13,7 +13,7 @@ public RecordFilterPica(String allowableRecordsInput) { @Override public boolean isEmpty() { - return criteria.isEmpty(); + return getBooleanCriteria() == null; } @Override @@ -21,11 +21,6 @@ public boolean isAllowable(MarcRecord marcRecord) { if (isEmpty()) return true; - for (CriteriumPica criterium : criteria) { - boolean passed = criterium.met(marcRecord); - if (passed) - return passed; - } - return false; + return metCriteria(marcRecord, booleanCriteria); } } diff --git a/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPica.java b/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPica.java index 7cbb0651a..5e22ac3f0 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPica.java +++ b/src/main/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPica.java @@ -1,6 +1,7 @@ package de.gwdg.metadataqa.marc.cli.utils.ignorablerecords; import de.gwdg.metadataqa.marc.dao.MarcRecord; +import de.gwdg.metadataqa.marc.utils.parser.BooleanContainer; import java.io.Serializable; @@ -14,16 +15,15 @@ public RecordIgnoratorPica(String ignorableRecordsInput) { @Override public boolean isEmpty() { - return criteria.isEmpty(); + return getBooleanCriteria() == null; } @Override public boolean isIgnorable(MarcRecord marcRecord) { - for (CriteriumPica criterium : criteria) { - boolean passed = criterium.met(marcRecord); - if (passed) - return passed; - } - return false; + if (isEmpty()) + return true; + + return metCriteria(marcRecord, booleanCriteria); } + } diff --git a/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanContainer.java b/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanContainer.java index dcbb6d9d0..255c20af2 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanContainer.java +++ b/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanContainer.java @@ -5,21 +5,21 @@ import java.util.ArrayList; import java.util.List; -public class BooleanContainer { +public class BooleanContainer { - public enum Op{AND, OR}; + public enum Op{AND, OR} private Op op; - private List children = new ArrayList<>(); - private Object value; + private List> children = new ArrayList<>(); + private T value; public BooleanContainer() {} - public BooleanContainer(String value) { + public BooleanContainer(T value) { this.value = value; } - public BooleanContainer(Op op, List children) { + public BooleanContainer(Op op, List> children) { this.op = op; this.children = children; } @@ -32,14 +32,30 @@ public void setOp(Op op) { this.op = op; } - public List getChildren() { + public List> getChildren() { return children; } - public Object getValue() { + public T getValue() { return value; } + public void setValue(T value) { + this.value = value; + } + + public int size() { + int size = 0; + if (value != null) + size++; + if (children != null) { + for (BooleanContainer child : children) { + size += child.size(); + } + } + return size; + } + @Override public String toString() { List props = new ArrayList<>(); @@ -49,6 +65,6 @@ public String toString() { props.add("children=" + children); if (value != null) props.add("value='" + value + '\''); - return this.getClass().getSimpleName() + "{" + StringUtils.join(props) + '}'; + return this.getClass().getSimpleName() + "{" + StringUtils.join(props, ", ") + '}'; } } diff --git a/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParser.java b/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParser.java index 2a62d3a0f..9917ee3e2 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParser.java +++ b/src/main/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParser.java @@ -10,63 +10,68 @@ */ public class BooleanParser { - private BooleanParser() {} + String token = ""; + String last = ""; + int start = 0; + boolean skippedOp = false; + String input; + Deque parens = new LinkedList<>(); - public static BooleanContainer parse(String input) { - String token = ""; - String last = ""; - int start = 0; - boolean skippedOp = false; - BooleanContainer root = new BooleanContainer(); - Deque parens = new LinkedList<>(); + private BooleanParser(String input) { + this.input = input; + } + + public static BooleanContainer parse(String _input) { + BooleanParser parser = new BooleanParser(_input); + return parser.parse(); + } + + private BooleanContainer parse() { + BooleanContainer root = new BooleanContainer(); for (int i = 0; i < input.length(); i++) { String n = input.substring(i, i+1); if (n.equals("&") && last.equals("&")) { - if (parens.isEmpty()) { - token = input.substring(start, i-1).trim(); - if (root.getOp() == null) { - root.setOp(BooleanContainer.Op.AND); - } - addChild(root, token); - start = i+1; - skippedOp = false; - } else { - skippedOp = true; - } + processOp(i, root, BooleanContainer.Op.AND); } else if (n.equals("|") && last.equals("|")) { - if (parens.isEmpty()) { - token = input.substring(start, i-1).trim(); - if (root.getOp() == null) { - root.setOp(BooleanContainer.Op.OR); - } - addChild(root, token); - start = i+1; - skippedOp = false; - } else { - skippedOp = true; - } + processOp(i, root, BooleanContainer.Op.OR); } else if (n.equals("(")) { parens.add(i); } else if (n.equals(")")) { - if (parens.isEmpty()) { - System.err.println("Error: closing parens without opening one: " + input); - } + if (parens.isEmpty()) + throw new IllegalArgumentException("Error: closing parens without opening one: " + input); parens.pollLast(); } last = n; } token = input.substring(start).trim(); addChild(root, token); - if (!parens.isEmpty()) { - System.err.println("Error: opening parens without closing one: " + input); - } + if (!parens.isEmpty()) + throw new IllegalArgumentException("Error: opening parens without closing one: " + input); return root; } - private static void addChild(BooleanContainer root, String token) { + private void processOp(int i, BooleanContainer root, BooleanContainer.Op and) { + if (parens.isEmpty()) { + if (root.getOp() == null) + root.setOp(and); + addChild(root, input.substring(start, i -1).trim()); + start = i +1; + skippedOp = false; + } else { + skippedOp = true; + } + } + + private void addChild(BooleanContainer root, String token) { + if (skippedOp && !(token.startsWith("(") && token.endsWith(")"))) + throw new IllegalArgumentException("internal operator with imperfect parenthes: " + input); + BooleanContainer child = (token.startsWith("(") && token.endsWith(")")) ? parse(token.substring(1, token.length()-1)) : new BooleanContainer(token); - root.getChildren().add(child); + if (child.getValue() != null && child.getOp() == null && root.getOp() == null) + root.setValue(child.getValue()); + else + root.getChildren().add(child); } } diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParametersTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParametersTest.java index 11681d81c..62cb11190 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParametersTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParametersTest.java @@ -326,9 +326,10 @@ public void getRecordIgnorator_pica() { logger.log(Level.WARNING, "error in schemaType()", e); } assertEquals("RecordIgnoratorPica", parameters.getRecordIgnorator().getClass().getSimpleName()); - assertEquals(1, ((RecordIgnoratorPica)parameters.getRecordIgnorator()).getCriteria().size()); - assertEquals("CriteriumPica{path=002@.0, operator=NOT_MATCH, value='^L'}", - ((RecordIgnoratorPica)parameters.getRecordIgnorator()).getCriteria().get(0).toString()); + RecordIgnoratorPica recordIgnorator = (RecordIgnoratorPica)parameters.getRecordIgnorator(); + assertNotNull(recordIgnorator.getBooleanCriteria()); + assertEquals("BooleanContainer{value='CriteriumPica{path=002@.0, operator=NOT_MATCH, value='^L'}'}", + recordIgnorator.getBooleanCriteria().toString()); } @Test @@ -356,9 +357,10 @@ public void getRecordFilter_pica() { logger.log(Level.WARNING, "error in schemaType()", e); } assertEquals("RecordFilterPica", parameters.getRecordFilter().getClass().getSimpleName()); - assertEquals(1, ((RecordFilterPica)parameters.getRecordFilter()).getCriteria().size()); - assertEquals("CriteriumPica{path=002@.0, operator=NOT_MATCH, value='^L'}", - ((RecordFilterPica)parameters.getRecordFilter()).getCriteria().get(0).toString()); + RecordFilterPica recordFilter = (RecordFilterPica)parameters.getRecordFilter(); + assertNotNull(recordFilter.getBooleanCriteria()); + assertEquals("BooleanContainer{value='CriteriumPica{path=002@.0, operator=NOT_MATCH, value='^L'}'}", + recordFilter.getBooleanCriteria().toString()); } @Test diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPicaTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPicaTest.java index 6d06ec517..68b49beb3 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPicaTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/utils/ignorablerecords/RecordIgnoratorPicaTest.java @@ -2,6 +2,7 @@ import de.gwdg.metadataqa.marc.dao.DataField; import de.gwdg.metadataqa.marc.dao.MarcRecord; +import de.gwdg.metadataqa.marc.utils.parser.BooleanContainer; import de.gwdg.metadataqa.marc.utils.pica.PicaFieldDefinition; import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaReader; import org.junit.Test; @@ -39,18 +40,20 @@ public void parse_ex4() { @Test public void parse_ex5() { - RecordIgnorator ignorator = new RecordIgnoratorPica("002@.0 !~ '^L',002@.0 !~ '^..[iktN]',002@.0 !~ '^.v',021A.a?"); + RecordIgnorator ignorator = new RecordIgnoratorPica("002@.0 !~ '^L' && 002@.0 !~ '^..[iktN]' && (002@.0 !~ '^.v' || 021A.a?)"); assertFalse(ignorator.isEmpty()); - List criteria = ((RecordIgnoratorPica)ignorator).getCriteria(); - assertEquals(4, criteria.size()); + BooleanContainer container = ((RecordIgnoratorPica)ignorator).getBooleanCriteria(); + assertEquals(4, container.size()); + CriteriumPica criteria = container.getChildren().get(0).getValue(); - assertEquals("002@.0", criteria.get(0).getPath().getPath()); - assertEquals(Operator.NOT_MATCH, criteria.get(0).getOperator()); - assertEquals("^L", criteria.get(0).getValue()); + assertEquals("002@.0", criteria.getPath().getPath()); + assertEquals(Operator.NOT_MATCH, criteria.getOperator()); + assertEquals("^L", criteria.getValue()); - assertEquals("021A.a", criteria.get(3).getPath().getPath()); - assertEquals(Operator.EXIST, criteria.get(3).getOperator()); - assertEquals(null, criteria.get(3).getValue()); + criteria = (CriteriumPica) container.getChildren().get(2).getChildren().get(1).getValue(); + assertEquals("021A.a", criteria.getPath().getPath()); + assertEquals(Operator.EXIST, criteria.getOperator()); + assertEquals(null, criteria.getValue()); } @Test @@ -134,18 +137,25 @@ public void isIgnorable_exists_not() { isIgnorableFailing("pica", "002@.b?"); } + @Test + public void parse_boolean() { + RecordIgnorator ignorator = new RecordIgnoratorPica("002@.0 !~ '^L' && 002@.0 !~ '^..[iktN]' && (002@.0 !~ '^v' || 021A.a?)"); + } + private String getPath(String fileName) { return Paths.get("src/test/resources/" + fileName).toAbsolutePath().toString(); } - private void testParsing(String ignorableRecordsInput, int expected, String expected1, Operator notMatch, String expected2) { + private void testParsing(String ignorableRecordsInput, int size, String path, Operator op, String value) { RecordIgnorator ignorator = new RecordIgnoratorPica(ignorableRecordsInput); assertFalse(ignorator.isEmpty()); - List criteria = ((RecordIgnoratorPica)ignorator).getCriteria(); - assertEquals(expected, criteria.size()); - assertEquals(expected1, criteria.get(0).getPath().getPath()); - assertEquals(notMatch, criteria.get(0).getOperator()); - assertEquals(expected2, criteria.get(0).getValue()); + BooleanContainer criteria = ((RecordIgnoratorPica)ignorator).getBooleanCriteria(); + // List criteria = ((RecordIgnoratorPica)ignorator).getCriteria(); + // assertEquals(size, criteria.size()); + assertEquals("CriteriumPica", criteria.getValue().getClass().getSimpleName()); + assertEquals(path, ((CriteriumPica)criteria.getValue()).getPath().getPath()); + assertEquals(op, ((CriteriumPica)criteria.getValue()).getOperator()); + assertEquals(value, ((CriteriumPica)criteria.getValue()).getValue()); } private void isIgnorable(String abk, String ignorableRecordsInput) { diff --git a/src/test/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParserTest.java b/src/test/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParserTest.java index 55957b21f..5f63d7185 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParserTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/utils/parser/BooleanParserTest.java @@ -10,7 +10,7 @@ public class BooleanParserTest { @Test public void parse_ex1() { String input = "002@.0 !~ '^L' && 002@.0 !~ '^..[iktN]' && (002@.0 !~ '^.v' || 021A.a?)"; - BooleanContainer root = BooleanParser.parse(input); + BooleanContainer root = BooleanParser.parse(input); assertNotNull(root); assertEquals(BooleanContainer.Op.AND, root.getOp()); assertEquals(null, root.getValue()); @@ -25,9 +25,18 @@ public void parse_ex1() { assertEquals("002@.0 !~ '^.v'", root.getChildren().get(2).getChildren().get(0).getValue()); assertEquals(null, root.getChildren().get(2).getChildren().get(1).getOp()); assertEquals("021A.a?", root.getChildren().get(2).getChildren().get(1).getValue()); - assertEquals("BooleanContainer{[op=AND, children=[BooleanContainer{[value='002@.0 !~ '^L'']}, BooleanContainer{[value='002@.0 !~ '^..[iktN]'']}, BooleanContainer{[op=OR, children=[BooleanContainer{[value='002@.0 !~ '^.v'']}, BooleanContainer{[value='021A.a?']}]]}]]}", + assertEquals("BooleanContainer{op=AND, children=[BooleanContainer{value='002@.0 !~ '^L''}, BooleanContainer{value='002@.0 !~ '^..[iktN]''}, BooleanContainer{op=OR, children=[BooleanContainer{value='002@.0 !~ '^.v''}, BooleanContainer{value='021A.a?'}]}]}", root.toString()); + assertEquals(4, root.size()); } - + @Test + public void parse_ex2() { + String input = "002@.0 !~ '^L'"; + BooleanContainer root = BooleanParser.parse(input); + assertNotNull(root); + assertEquals("BooleanContainer{value='002@.0 !~ '^L''}", + root.toString()); + assertEquals(1, root.size()); + } }