Skip to content

Commit

Permalink
issue #145: parsing PICA path
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Jun 21, 2022
1 parent 1108a26 commit f1b7f3f
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,20 @@

public class PicaPath {

public enum SubfieldType {
SINGLE,
MULTI,
ALL
}


private String path;
private String tag = null;
private String xtag = null;
private Occurrence occurrence = null;
private String subfields = null;
private SubfieldType subfieldType = null;
private Subfields subfields = null;

public PicaPath(String path, String tag, String xtag, Occurrence occurrence, String subfields, SubfieldType subfieldType) {
public PicaPath(String path, String tag, String xtag, Occurrence occurrence, Subfields subfields) {
this.path = path;
this.tag = tag;
this.xtag = xtag;
this.occurrence = occurrence;
this.subfields = subfields;
this.subfieldType = subfieldType;
}

public String getPath() {
Expand All @@ -41,11 +34,7 @@ public Occurrence getOccurrence() {
return occurrence;
}

public String getSubfields() {
public Subfields getSubfields() {
return subfields;
}

public SubfieldType getSubfieldType() {
return subfieldType;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@ public class PicaPathParser {
private static final Pattern XTAG = Pattern.compile("^(2[0-9\\.][0-9\\.][A-Z@\\.]x\\d+)");
private static final Pattern OCCURENCE = Pattern.compile("^/((\\d+)-(\\d+)|(\\d{1,3})|(\\*))");
private static final Pattern SUBFIELDS = Pattern.compile("^[\\$.]?(([A-Za-z0-9]+)|(\\*))");

public static PicaPath parse(String input) {
String path = input;
String tag = null;
String xtag = null;
Occurrence occurrence = null;
String subfields = null;
PicaPath.SubfieldType subfieldType = null;
Subfields subfields = null;

String remainder = null;
Matcher m = null;
Expand Down Expand Up @@ -56,18 +54,23 @@ public static PicaPath parse(String input) {
if (remainder != null) {
m = SUBFIELDS.matcher(remainder);
if (m.find()) {
subfields = m.group(1);
String subfieldsRaw = m.group(1);
Subfields.Type subfieldType = null;
if (m.group(2) != null) {
subfieldType = m.group(2).length() == 1 ? PicaPath.SubfieldType.SINGLE : PicaPath.SubfieldType.MULTI;
} else if (m.group(3) != null)
subfieldType = PicaPath.SubfieldType.ALL;
subfieldType = m.group(2).length() == 1 ? Subfields.Type.SINGLE : Subfields.Type.MULTI;
} else if (m.group(3) != null) {
subfieldType = Subfields.Type.ALL;
}
if (subfieldType == null)
throw new IllegalArgumentException("The input does not fit to rules: " + input);
subfields = new Subfields(subfieldType, subfieldsRaw);
remainder = m.end() < remainder.length() ? remainder.substring(m.end()) : null;
}
}

if (remainder != null)
throw new IllegalArgumentException("The input does not fit to rules: " + input);

return new PicaPath(path, tag, xtag, occurrence, subfields, subfieldType);
return new PicaPath(path, tag, xtag, occurrence, subfields);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package de.gwdg.metadataqa.marc.utils.pica.path;

import java.util.Arrays;
import java.util.List;

public class Subfields {
public enum Type {
SINGLE,
MULTI,
ALL
}

private Type type;
private String input;
private List<String> codes;

public Subfields(Type type, String input) {
this.type = type;
this.input = input;
codes = Arrays.asList(input.split(""));
}

public Type getType() {
return type;
}

public String getInput() {
return input;
}

public List<String> getCodes() {
return codes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import org.junit.Test;

import java.util.Arrays;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

public class PicaPathParserTest {

Expand All @@ -12,6 +15,7 @@ public void tag() {
assertEquals("003@", path.getPath());
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertNull(path.getSubfields());
}

@Test
Expand All @@ -20,6 +24,7 @@ public void xtag() {
assertEquals("203@x1", path.getPath());
assertEquals(null, path.getTag());
assertEquals("203@x1", path.getXtag());
assertNull(path.getSubfields());
}

@Test(expected = IllegalArgumentException.class)
Expand All @@ -28,6 +33,7 @@ public void illegalTag() {
assertEquals("203@x1", path.getPath());
assertEquals(null, path.getTag());
assertEquals("203@x1", path.getXtag());
assertNull(path.getSubfields());
}

@Test
Expand All @@ -39,6 +45,7 @@ public void occurenceNumber() {
assertEquals(Occurrence.Type.SINGLE, path.getOccurrence().getType());
assertEquals(2, path.getOccurrence().getStart().intValue());
assertEquals(null, path.getOccurrence().getEnd());
assertNull(path.getSubfields());
}

@Test
Expand All @@ -50,6 +57,7 @@ public void occurenceRange() {
assertEquals(Occurrence.Type.RANGE, path.getOccurrence().getType());
assertEquals(2, path.getOccurrence().getStart().intValue());
assertEquals(3, path.getOccurrence().getEnd().intValue());
assertNull(path.getSubfields());
}

@Test
Expand All @@ -59,6 +67,7 @@ public void occurenceAsteriks() {
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertEquals(Occurrence.Type.ALL, path.getOccurrence().getType());
assertNull(path.getSubfields());
}

@Test
Expand All @@ -68,8 +77,9 @@ public void occurenceAsteriks_withsubfield() {
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertEquals(Occurrence.Type.ALL, path.getOccurrence().getType());
assertEquals(PicaPath.SubfieldType.ALL, path.getSubfieldType());
assertEquals("*", path.getSubfields());
assertEquals(Subfields.Type.ALL, path.getSubfields().getType());
assertEquals("*", path.getSubfields().getInput());
assertEquals(Arrays.asList("*"), path.getSubfields().getCodes());
}

@Test
Expand All @@ -79,8 +89,9 @@ public void subfieldsTag() {
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertEquals(null, path.getOccurrence());
assertEquals(PicaPath.SubfieldType.SINGLE, path.getSubfieldType());
assertEquals("a", path.getSubfields());
assertEquals(Subfields.Type.SINGLE, path.getSubfields().getType());
assertEquals("a", path.getSubfields().getInput());
assertEquals(Arrays.asList("a"), path.getSubfields().getCodes());
}

@Test
Expand All @@ -90,8 +101,9 @@ public void subfieldsTag_multiple() {
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertEquals(null, path.getOccurrence());
assertEquals(PicaPath.SubfieldType.MULTI, path.getSubfieldType());
assertEquals("abc", path.getSubfields());
assertEquals(Subfields.Type.MULTI, path.getSubfields().getType());
assertEquals("abc", path.getSubfields().getInput());
assertEquals(Arrays.asList("a", "b", "c"), path.getSubfields().getCodes());
}

@Test
Expand All @@ -101,8 +113,9 @@ public void subfieldsTag_multiple_mixed() {
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertEquals(null, path.getOccurrence());
assertEquals(PicaPath.SubfieldType.MULTI, path.getSubfieldType());
assertEquals("aBc", path.getSubfields());
assertEquals(Subfields.Type.MULTI, path.getSubfields().getType());
assertEquals("aBc", path.getSubfields().getInput());
assertEquals(Arrays.asList("a", "B", "c"), path.getSubfields().getCodes());
}

@Test
Expand All @@ -112,8 +125,9 @@ public void subfieldsTag_asteriks() {
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertEquals(null, path.getOccurrence());
assertEquals(PicaPath.SubfieldType.ALL, path.getSubfieldType());
assertEquals("*", path.getSubfields());
assertEquals(Subfields.Type.ALL, path.getSubfields().getType());
assertEquals("*", path.getSubfields().getInput());
assertEquals(Arrays.asList("*"), path.getSubfields().getCodes());
}

@Test
Expand All @@ -123,7 +137,8 @@ public void subfieldsTag_asteriks_nodollar() {
assertEquals("003@", path.getTag());
assertEquals(null, path.getXtag());
assertEquals(null, path.getOccurrence());
assertEquals(PicaPath.SubfieldType.ALL, path.getSubfieldType());
assertEquals("*", path.getSubfields());
assertEquals(Subfields.Type.ALL, path.getSubfields().getType());
assertEquals("*", path.getSubfields().getInput());
assertEquals(Arrays.asList("*"), path.getSubfields().getCodes());
}
}

0 comments on commit f1b7f3f

Please sign in to comment.