Skip to content

Commit

Permalink
Implement codelist and flags for the Unimarc Acram schema reader. #405
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Jan 20, 2024
1 parent 62b099b commit a3d0445
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
Expand All @@ -35,6 +36,7 @@ public class UnimarcSchemaReader {
private static final String SUBFIELDS = "subfields";
private static final String POSITIONS = "positions";
private static final String CODES = "codes";
private static final String FLAGS = "flags";
private static final String CODELIST = "codelist";
private static final String START = "start";
private static final String END = "end";
Expand All @@ -52,18 +54,21 @@ public class UnimarcSchemaReader {
LABEL, 1,
REPEATABLE, 1,
CODELIST, 1,
POSITIONS, 1);
POSITIONS, 1,
CODES, 1);
private static final Map<String, Integer> knownIndicatorProperties = Map.of(
LABEL, 1,
CODES,1);

private final JSONParser parser = new JSONParser(JSONParser.MODE_RFC4627);
private final UnimarcSchemaManager schema = new UnimarcSchemaManager();
private Map<String, List<EncodedValue>> codeLists = new HashMap<>();

public UnimarcSchemaManager createSchema(InputStream inputStream) {
try {
JSONObject obj = readFile(inputStream);
processFields(obj);
JSONObject jsonObject = readStream(inputStream);
processCodeLists(jsonObject);
processFields(jsonObject);
} catch (ParseException e) {
logger.severe(e.getLocalizedMessage());
}
Expand All @@ -73,21 +78,32 @@ public UnimarcSchemaManager createSchema(InputStream inputStream) {

public UnimarcSchemaManager createSchema(String filename) {
try {
JSONObject obj = readFile(filename);
processFields(obj);
JSONObject jsonObject = readFile(filename);
processCodeLists(jsonObject);
processFields(jsonObject);
} catch (FileNotFoundException | ParseException e) {
logger.severe(e.getLocalizedMessage());
}

return schema;
}

private void processCodeLists(JSONObject jsonObject) {
JSONObject fields = (JSONObject) jsonObject.get("codelists");
for (Map.Entry<String, Object> entry : fields.entrySet()) {
String codeListName = entry.getKey();
JSONObject properties = (JSONObject) entry.getValue();
List<EncodedValue> codeList = processCodes((JSONObject) properties.get("codes"));
codeLists.put(codeListName, codeList);
}
}

private JSONObject readFile(String filename) throws FileNotFoundException, ParseException {
FileReader reader = new FileReader(filename);
return (JSONObject) parser.parse(reader);
}

private JSONObject readFile(InputStream stream) throws ParseException {
private JSONObject readStream(InputStream stream) throws ParseException {
InputStreamReader streamReader = new InputStreamReader(stream, StandardCharsets.UTF_8);
return (JSONObject) parser.parse(streamReader);
}
Expand Down Expand Up @@ -210,6 +226,10 @@ private List<SubfieldDefinition> getSubfields(JSONObject jsonField, String paren
codeList.setCodes(codes);
subfieldDefinition.setCodeList(codeList);

codes = getCodes(jsonSubfield, CODES);
if (!codes.isEmpty())
subfieldDefinition.setCodes(codes);

String subfieldTag = String.format("%s$%s", parentTag, code);

List<ControlfieldPositionDefinition> positions = getPositions(jsonSubfield, subfieldTag);
Expand All @@ -224,7 +244,7 @@ private List<SubfieldDefinition> getSubfields(JSONObject jsonField, String paren
// Log all unhandled subfield properties
for (String property : jsonSubfield.keySet()) {
if (!knownSubfieldProperties.containsKey(property)) {
logger.warning(() -> "unhandled subfield property: " + property);
logger.warning(() -> String.format("%s$%s unhandled subfield property: %s", parentTag, code, property));
}
}
}
Expand Down Expand Up @@ -262,7 +282,12 @@ private List<ControlfieldPositionDefinition> getPositions(JSONObject positionPar
positionDefinition.setId(positionId);

List<EncodedValue> codes = getCodes(position, CODES);
positionDefinition.setCodes(codes);
if (!codes.isEmpty())
positionDefinition.setCodes(codes);

codes = getCodes(position, FLAGS);
if (!codes.isEmpty())
positionDefinition.setCodes(codes);

// Get first length of the codes
int codeLength = codes.stream().findFirst().map(EncodedValue::getCode).map(String::length).orElse(0);
Expand Down Expand Up @@ -327,14 +352,23 @@ private void checkPositionPlaces(String key, int positionStart, Object positionE
* @return The list of codes for the respective codesHolder
*/
private List<EncodedValue> getCodes(JSONObject codesHolder, String objectKey) {
JSONObject codes = (JSONObject) codesHolder.get(objectKey);
Object listValue = codesHolder.get(objectKey);
if (listValue instanceof String) {
return codeLists.computeIfAbsent((String) listValue, s -> List.of());
}

JSONObject codes = (JSONObject) listValue;
if (codes == null) {
return List.of();
}
return processCodes(codes);
}

private List<EncodedValue> processCodes(JSONObject codes) {
List<EncodedValue> encodedValues = new ArrayList<>();
for (Map.Entry<String, Object> codeEntry : codes.entrySet()) {
String code = codeEntry.getKey();
String codeLabel = (String) codes.get(code);
String codeLabel = (String) codeEntry.getValue();

if (code.startsWith("//")) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import de.gwdg.metadataqa.marc.EncodedValue;
import de.gwdg.metadataqa.marc.definition.general.codelist.CodeList;
import de.gwdg.metadataqa.marc.definition.structure.ControlfieldPositionDefinition;
import de.gwdg.metadataqa.marc.definition.structure.Indicator;
import de.gwdg.metadataqa.marc.definition.structure.SubfieldDefinition;
import org.junit.Before;
Expand Down Expand Up @@ -94,7 +95,6 @@ public void createSchema_indicatorStructureIsCorrect() {
@Test
public void createSchema_subfieldStructureIsCorrect() {
int expectedSubfieldCount = 3;
int expectedSubfield2CodeListSize = 56;
int expected100aPositionCount = 14;
int expected100aPosition1CodeCount = 12;

Expand All @@ -111,14 +111,6 @@ public void createSchema_subfieldStructureIsCorrect() {
assertNotNull(subfieldB);
assertTrue(subfieldA.isRepeatable());

SubfieldDefinition subfield2 = subfieldDefinitions.get("2");
assertNotNull(subfield2);
assertFalse(subfield2.isRepeatable());

CodeList codeList = subfield2.getCodeList();
assertNotNull(codeList);
assertEquals(expectedSubfield2CodeListSize, codeList.getCodes().size());

UnimarcFieldDefinition field100 = schema.lookup("100");
Map<String, SubfieldDefinition> subfieldDefinitions100 = field100.getSubfieldDefinitions();
SubfieldDefinition subfield100a = subfieldDefinitions100.get("a");
Expand All @@ -129,6 +121,42 @@ public void createSchema_subfieldStructureIsCorrect() {
List<EncodedValue> codes = subfield100a.getPosition(8, 9).getCodes();
assertEquals(expected100aPosition1CodeCount, codes.size());
}

@Test
public void createSchema_subfieldStructureIsCorrect_886_2() {
UnimarcFieldDefinition lastField = schema.lookup("886");
Map<String, SubfieldDefinition> subfieldDefinitions = lastField.getSubfieldDefinitions();

SubfieldDefinition subfield2 = subfieldDefinitions.get("2");
assertNotNull(subfield2);
assertFalse(subfield2.isRepeatable());

List<EncodedValue> codeList = subfield2.getCodes();
assertNotNull(codeList);
assertEquals(56, codeList.size());
}

@Test
public void createSchema_subfieldStructureIsCorrect_100a() {
UnimarcFieldDefinition lastField = schema.lookup("100");
Map<String, SubfieldDefinition> subfieldDefinitions = lastField.getSubfieldDefinitions();

SubfieldDefinition subfield = subfieldDefinitions.get("a");
assertNotNull(subfield);
assertFalse(subfield.isRepeatable());

List<ControlfieldPositionDefinition> positions = subfield.getPositions();
assertNotNull(positions);
assertEquals(14, positions.size());

ControlfieldPositionDefinition position = subfield.getPosition(17, 20);
assertNotNull(position);
System.err.println(position);
assertEquals(1, position.getUnitLength());
assertTrue(position.isRepeatableContent());
assertNotNull(position.getCodes());
}

private String getPath(String filename) {
return Paths.get("src/test/resources/" + filename).toAbsolutePath().toString();
}
Expand Down

0 comments on commit a3d0445

Please sign in to comment.