Skip to content

Commit

Permalink
Merge branch 'gegic-unimarc-schema-parsing'
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Dec 18, 2023
2 parents 7fc279a + 75cd5f7 commit c1d7679
Show file tree
Hide file tree
Showing 54 changed files with 99,683 additions and 416 deletions.
29 changes: 14 additions & 15 deletions src/main/java/de/gwdg/metadataqa/marc/MarcFactory.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package de.gwdg.metadataqa.marc;

import de.gwdg.metadataqa.api.json.DataElement;
import de.gwdg.metadataqa.api.model.selector.JsonSelector;
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
import de.gwdg.metadataqa.api.model.selector.JsonSelector;
import de.gwdg.metadataqa.api.schema.MarcJsonSchema;
import de.gwdg.metadataqa.api.schema.Schema;
import de.gwdg.metadataqa.marc.cli.utils.IteratorResponse;
Expand All @@ -14,30 +14,29 @@
import de.gwdg.metadataqa.marc.dao.Control008;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.DefaultMarcPositionalControlField;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.dao.Marc21Leader;
import de.gwdg.metadataqa.marc.dao.MarcLeader;
import de.gwdg.metadataqa.marc.dao.SimpleControlField;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.dao.record.Marc21AuthorityRecord;
import de.gwdg.metadataqa.marc.dao.record.Marc21BibliographicRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.dao.record.Marc21Record;
import de.gwdg.metadataqa.marc.dao.record.PicaRecord;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.MarcVersion;
import de.gwdg.metadataqa.marc.definition.TagDefinitionLoader;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.DefaultControlFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.SubfieldDefinition;
import de.gwdg.metadataqa.marc.definition.TagDefinitionLoader;

import de.gwdg.metadataqa.marc.utils.alephseq.AlephseqLine;
import de.gwdg.metadataqa.marc.utils.MapToDatafield;

import de.gwdg.metadataqa.marc.utils.alephseq.AlephseqLine;
import de.gwdg.metadataqa.marc.utils.alephseq.MarcMakerLine;
import de.gwdg.metadataqa.marc.utils.alephseq.MarclineLine;
import de.gwdg.metadataqa.marc.utils.marcreader.schema.Marc21SchemaManager;
import de.gwdg.metadataqa.marc.utils.pica.PicaDataField;
import de.gwdg.metadataqa.marc.utils.pica.PicaFieldDefinition;
import de.gwdg.metadataqa.marc.utils.pica.reader.model.PicaLine;
import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaManager;
import de.gwdg.metadataqa.marc.utils.pica.PicaSubfield;
import de.gwdg.metadataqa.marc.utils.pica.reader.model.PicaLine;
import net.minidev.json.JSONArray;
import org.marc4j.marc.ControlField;
import org.marc4j.marc.Record;
Expand All @@ -46,8 +45,8 @@
import org.marc4j.marc.impl.ControlFieldImpl;
import org.marc4j.marc.impl.DataFieldImpl;
import org.marc4j.marc.impl.LeaderImpl;
import org.marc4j.marc.impl.SubfieldImpl;
import org.marc4j.marc.impl.RecordImpl;
import org.marc4j.marc.impl.SubfieldImpl;

import java.security.InvalidParameterException;
import java.util.ArrayList;
Expand Down Expand Up @@ -82,7 +81,7 @@ public static BibliographicRecord create(JsonSelector selector, MarcVersion vers
continue;
switch (dataElement.getLabel()) {
case "leader":
marcRecord.setLeader(new Leader(extractFirst(selector, dataElement)));
marcRecord.setLeader(new Marc21Leader(extractFirst(selector, dataElement)));
break;
case "001":
marcRecord.setControl001(new Control001(extractFirst(selector, dataElement)));
Expand Down Expand Up @@ -128,7 +127,7 @@ public static BibliographicRecord createFromMarc4j(Record marc4jRecord) {
}

public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType) {
MarcLeader.Type defaultType) {
return createFromMarc4j(marc4jRecord, defaultType, MarcVersion.MARC21);
}

Expand All @@ -138,7 +137,7 @@ public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
}

public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType,
MarcLeader.Type defaultType,
MarcVersion marcVersion) {
return createFromMarc4j(marc4jRecord, defaultType, marcVersion, null);
}
Expand All @@ -152,7 +151,7 @@ public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
* @return
*/
public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType,
MarcLeader.Type defaultType,
MarcVersion marcVersion,
String replacementInControlFields) {
var marcRecord = new Marc21BibliographicRecord();
Expand All @@ -161,7 +160,7 @@ public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
String data = marc4jRecord.getLeader().marshal();
if (replacementInControlFields != null)
data = data.replace(replacementInControlFields, " ");
marcRecord.setLeader(new Leader(data, defaultType));
marcRecord.setLeader(new Marc21Leader(data, defaultType));

if (marcRecord.getType() == null) {
throw new InvalidParameterException(
Expand Down
26 changes: 13 additions & 13 deletions src/main/java/de/gwdg/metadataqa/marc/MarcFieldExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@
import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.interfaces.Calculator;
import de.gwdg.metadataqa.api.interfaces.MetricResult;
import de.gwdg.metadataqa.api.model.selector.JsonSelector;
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
import de.gwdg.metadataqa.api.model.selector.JsonSelector;
import de.gwdg.metadataqa.api.model.selector.Selector;
import de.gwdg.metadataqa.api.schema.Schema;
import de.gwdg.metadataqa.api.util.CompressionLevel;
import de.gwdg.metadataqa.marc.dao.Control007;
import de.gwdg.metadataqa.marc.dao.Control008;
import de.gwdg.metadataqa.marc.dao.Marc21Leader;
import de.gwdg.metadataqa.marc.definition.general.codelist.CodeList;
import de.gwdg.metadataqa.marc.definition.general.codelist.LanguageCodes;
import de.gwdg.metadataqa.marc.definition.general.codelist.OrganizationCodes;
import org.apache.commons.lang3.StringUtils;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -19,14 +27,6 @@
import java.util.StringTokenizer;
import java.util.logging.Logger;

import de.gwdg.metadataqa.marc.dao.Control007;
import de.gwdg.metadataqa.marc.dao.Control008;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.definition.general.codelist.CodeList;
import de.gwdg.metadataqa.marc.definition.general.codelist.LanguageCodes;
import de.gwdg.metadataqa.marc.definition.general.codelist.OrganizationCodes;
import org.apache.commons.lang3.StringUtils;

/**
*
* @author Péter Király <peter.kiraly at gwdg.de>
Expand All @@ -44,7 +44,7 @@ public class MarcFieldExtractor implements Calculator, Serializable {
protected FieldCounter<List<String>> resultMap;
protected Schema schema;
private String recordId;
private Leader leader;
private Marc21Leader leader;
private Control007 x007;
private Control008 x008;
private Map<String, Object> duplumKeyMap;
Expand Down Expand Up @@ -123,7 +123,7 @@ public void measure(JsonSelector selector)
values.add(instance.getValue());
}
if (fieldName.equals(LEADER_KEY)) {
leader = new Leader(values.get(0));
leader = new Marc21Leader(values.get(0));
}
}
resultMap.put(fieldName, values);
Expand Down Expand Up @@ -197,7 +197,7 @@ public List<String> getHeader() {

public void processLeader() {
if (resultMap.has(LEADER_KEY))
leader = new Leader(resultMap.get(LEADER_KEY).get(0));
leader = new Marc21Leader(resultMap.get(LEADER_KEY).get(0));
else
logger.severe("No leader in result map. Nr of existing vars: " + StringUtils.join(resultMap.getMap().keySet(), ", "));
}
Expand Down Expand Up @@ -251,7 +251,7 @@ public String getRecordId() {
return recordId;
}

public Leader getLeader() {
public Marc21Leader getLeader() {
return leader;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package de.gwdg.metadataqa.marc.analysis.validator;

import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.dao.MarcLeader;
import de.gwdg.metadataqa.marc.definition.ControlValue;

import java.util.ArrayList;
Expand All @@ -15,7 +15,7 @@ public LeaderValidator(ValidatorConfiguration configuration) {
super(configuration);
}

public boolean validate(Leader leader) {
public boolean validate(MarcLeader leader) {
var isValid = true;
ControlValueValidator controlValueValidator = new ControlValueValidator(configuration);
validationErrors = new ArrayList<>();
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import de.gwdg.metadataqa.marc.CsvUtils;
import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.analysis.completeness.CompletenessDAO;
import de.gwdg.metadataqa.marc.analysis.GroupSelector;
import de.gwdg.metadataqa.marc.analysis.completeness.CompletenessDAO;
import de.gwdg.metadataqa.marc.analysis.completeness.RecordCompleteness;
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;
Expand Down Expand Up @@ -397,7 +397,7 @@ private String formatCardinality(String marcPath,
tagLabel = tagHierarchy.getTagLabel();
subfieldLabel = tagHierarchy.getSubfieldLabel();
} else {
logger.severe("Key can not be found in the TagHierarchy: " + marcPathLabel);
logger.severe(() -> "Key can not be found in the TagHierarchy: " + marcPathLabel);
}

// Integer cardinality = entry.getValue();
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/de/gwdg/metadataqa/marc/cli/SerialScore.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package de.gwdg.metadataqa.marc.cli;

import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.analysis.Serial;
import de.gwdg.metadataqa.marc.analysis.SerialFields;
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
import de.gwdg.metadataqa.marc.cli.parameters.SerialScoreParameters;
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
import de.gwdg.metadataqa.marc.analysis.Serial;
import de.gwdg.metadataqa.marc.cli.utils.RecordIterator;
import de.gwdg.metadataqa.marc.dao.MarcLeader;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.dao.record.Marc21BibliographicRecord;
import de.gwdg.metadataqa.marc.model.validation.ValidationError;
import org.apache.commons.cli.HelpFormatter;
Expand All @@ -30,8 +30,8 @@
import java.util.logging.Level;
import java.util.logging.Logger;

import static de.gwdg.metadataqa.marc.Utils.quote;
import static de.gwdg.metadataqa.marc.Utils.createRow;
import static de.gwdg.metadataqa.marc.Utils.quote;

/**
* usage:
Expand Down Expand Up @@ -112,7 +112,7 @@ public void processRecord(BibliographicRecord marcRecord, int recordNumber, List
@Override
public void processRecord(BibliographicRecord marcRecord, int recordNumber) {
if (marcRecord instanceof Marc21BibliographicRecord
&& ((Marc21BibliographicRecord) marcRecord).getType().equals(Leader.Type.CONTINUING_RESOURCES)) {
&& ((Marc21BibliographicRecord) marcRecord).getType().equals(MarcLeader.Type.CONTINUING_RESOURCES)) {
if (parameters.getRecordIgnorator().isIgnorable(marcRecord))
return;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package de.gwdg.metadataqa.marc.cli.parameters;

import com.fasterxml.jackson.annotation.JsonIgnore;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnorator;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnoratorFactory;
import de.gwdg.metadataqa.marc.cli.utils.IgnorableFields;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordFilter;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordFilterFactory;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.cli.utils.IgnorableFields;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnorator;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnoratorFactory;
import de.gwdg.metadataqa.marc.dao.MarcLeader;
import de.gwdg.metadataqa.marc.definition.DataSource;
import de.gwdg.metadataqa.marc.definition.MarcFormat;
import de.gwdg.metadataqa.marc.definition.MarcVersion;
Expand Down Expand Up @@ -38,7 +38,7 @@ public class CommonParameters implements Serializable {
protected int limit = -1;
protected int offset = -1;
protected String id = null;
protected Leader.Type defaultRecordType = Leader.Type.BOOKS;
protected MarcLeader.Type defaultRecordType = MarcLeader.Type.BOOKS;
protected boolean fixAlephseq = false;
protected boolean fixAlma = false;
protected boolean fixKbr = false;
Expand Down Expand Up @@ -372,16 +372,16 @@ public void setId(String id) {
this.id = id;
}

public Leader.Type getDefaultRecordType() {
public MarcLeader.Type getDefaultRecordType() {
return defaultRecordType;
}

public void setDefaultRecordType(Leader.Type defaultRecordType) {
public void setDefaultRecordType(MarcLeader.Type defaultRecordType) {
this.defaultRecordType = defaultRecordType;
}

public void setDefaultRecordType(String defaultRecordType) throws ParseException {
this.defaultRecordType = Leader.Type.valueOf(defaultRecordType);
this.defaultRecordType = MarcLeader.Type.valueOf(defaultRecordType);
if (this.defaultRecordType == null)
throw new ParseException(String.format("Unrecognized defaultRecordType parameter value: '%s'", defaultRecordType));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.dao.MarcLeader;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.dao.record.Marc21Record;
import de.gwdg.metadataqa.marc.utils.TagHierarchy;
Expand All @@ -23,7 +23,7 @@ public Marc21CompletenessPlugin(CompletenessParameters parameters) {
public String getDocumentType(BibliographicRecord marcRecord) {
return marcRecord != null && marcRecord instanceof Marc21Record
? ((Marc21Record) marcRecord).getType().getValue()
: Leader.Type.BOOKS.getValue();
: MarcLeader.Type.BOOKS.getValue();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package de.gwdg.metadataqa.marc.cli.utils;

import de.gwdg.metadataqa.marc.MarcFactory;
import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.MarcFactory;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
import de.gwdg.metadataqa.marc.dao.MarcLeader;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.definition.DataSource;
import de.gwdg.metadataqa.marc.definition.MarcVersion;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
Expand Down Expand Up @@ -43,7 +43,7 @@ public class RecordIterator {
private CommonParameters parameters;
private String replacementInControlFields;
private MarcVersion marcVersion;
private Leader.Type defaultRecordType;
private MarcLeader.Type defaultRecordType;
private DecimalFormat decimalFormat;
private PicaSchemaManager picaSchema;
private String status = "waits";
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/dao/Control006.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public class Control006 extends MarcPositionalControlField implements Serializab

private final Map<Integer, ControlfieldPositionDefinition> byPosition = new LinkedHashMap<>();

public Control006(String content, Leader.Type recordType) {
public Control006(String content, MarcLeader.Type recordType) {
super(Control006Definition.getInstance(), content, recordType);
if (content != null)
processContent();
Expand Down Expand Up @@ -287,7 +287,7 @@ public Set<Integer> getSubfieldPositions() {
return byPosition.keySet();
}

public Leader.Type getRecordType() {
public MarcLeader.Type getRecordType() {
return recordType;
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/dao/Control008.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public class Control008 extends MarcPositionalControlField implements Serializab
private final Map<Integer, ControlfieldPositionDefinition> byPosition = new LinkedHashMap<>();
private Control008Type actual008Type;

public Control008(String content, Leader.Type recordType) {
public Control008(String content, MarcLeader.Type recordType) {
super(Control008Definition.getInstance(), content, recordType);
initialize();
}
Expand Down Expand Up @@ -301,7 +301,7 @@ public Map<ControlfieldPositionDefinition, String> getValueMap() {
return valuesMap;
}

public Leader.Type getRecordType() {
public MarcLeader.Type getRecordType() {
return recordType;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public DefaultMarcPositionalControlField(DefaultControlFieldDefinition definitio
processContent();
}

public DefaultMarcPositionalControlField(ControlFieldDefinition definition, String content, Leader.Type recordType) {
public DefaultMarcPositionalControlField(ControlFieldDefinition definition, String content, MarcLeader.Type recordType) {
super(definition, content, recordType);
}

Expand Down
Loading

0 comments on commit c1d7679

Please sign in to comment.