Skip to content

Commit

Permalink
eclipse-rdf4jGH-5058: additional parser code (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
barthanssens committed Jul 14, 2024
1 parent ae0266a commit f930705
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ public class CSVW {
public static final Namespace NS = Vocabularies.createNamespace(PREFIX, NAMESPACE);

// Classes
/** csvw:Row */
public static final IRI ROW;

/** csvw:Schema */
public static final IRI SCHEMA;

/** csvw:Table */
public static final IRI TABLE;

/** csvw:TableGroup */
public static final IRI TABLE_GROUP;

// Properties
/** csvw:aboutUrl */
Expand All @@ -60,6 +71,9 @@ public class CSVW {
/** csvw:delimiter */
public static final IRI DELIMITER;

/** csvw:describes */
public static final IRI DESCRIBES;

/** csvw:dialect */
public static final IRI DIALECT;

Expand Down Expand Up @@ -96,6 +110,12 @@ public class CSVW {
/** csvw:required */
public static final IRI REQUIRED;

/** csvw:rownum */
public static final IRI ROWNUM;

/** csvw:skipColumns */
public static final IRI SKIP_COLUMNS;

/** csvw:skipRows */
public static final IRI SKIP_ROWS;

Expand All @@ -121,13 +141,19 @@ public class CSVW {
public static final IRI VIRTUAL;

static {
ROW = Vocabularies.createIRI(NAMESPACE, "Row");
SCHEMA = Vocabularies.createIRI(NAMESPACE, "Schema");
TABLE = Vocabularies.createIRI(NAMESPACE, "Table");
TABLE_GROUP = Vocabularies.createIRI(NAMESPACE, "TableGroup");

ABOUT_URL = Vocabularies.createIRI(NAMESPACE, "aboutUrl");
BASE = Vocabularies.createIRI(NAMESPACE, "base");
COLUMN = Vocabularies.createIRI(NAMESPACE, "column");
DATATYPE = Vocabularies.createIRI(NAMESPACE, "datatype");
DECIMAL_CHAR = Vocabularies.createIRI(NAMESPACE, "decimalChar");
DEFAULT = Vocabularies.createIRI(NAMESPACE, "default");
DELIMITER = Vocabularies.createIRI(NAMESPACE, "delimiter");
DESCRIBES = Vocabularies.createIRI(NAMESPACE, "describes");
DIALECT = Vocabularies.createIRI(NAMESPACE, "dialect");
ENCODING = Vocabularies.createIRI(NAMESPACE, "encoding");
FORMAT = Vocabularies.createIRI(NAMESPACE, "format");
Expand All @@ -140,6 +166,8 @@ public class CSVW {
PROPERTY_URL = Vocabularies.createIRI(NAMESPACE, "propertyUrl");
QUOTE_CHAR = Vocabularies.createIRI(NAMESPACE, "quoteChar");
REQUIRED = Vocabularies.createIRI(NAMESPACE, "required");
ROWNUM = Vocabularies.createIRI(NAMESPACE, "rownum");
SKIP_COLUMNS = Vocabularies.createIRI(NAMESPACE, "skipColumns");
SKIP_ROWS = Vocabularies.createIRI(NAMESPACE, "skipRows");
TABLE_SCHEMA = Vocabularies.createIRI(NAMESPACE, "tableSchema");
TABLES = Vocabularies.createIRI(NAMESPACE, "tables");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ public class CSVWMetadataFinder {

/**
* Open URI as input stream
*
*
* @param uri
* @return
* @return
*/
private static InputStream openURI(URI uri) {
try (InputStream is = uri.toURL().openStream()) {
Expand All @@ -47,6 +47,7 @@ private static InputStream openURI(URI uri) {
return null;
}
}

/**
* Find by adding metadata.json as file extension
*
Expand All @@ -61,7 +62,7 @@ public static InputStream findByExtension(URI csvFile) {
URI metaURI = URI.create(s + METADATA_EXT);
return openURI(metaURI);
}

/**
* Find by trying to get the csv-metadata.json in the path
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw.parsers;

import java.util.Arrays;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Namespace;
Expand All @@ -22,20 +26,24 @@
* @author Bart Hanssens
*/
public abstract class CellParser {
private static final Pattern PLACEHOLDERS = Pattern.compile("(\\{#?_?[^\\}]+\\})");

private String name;
private IRI dataType;
private String lang;
private String defaultValue;
private String nullValue;
private boolean required;
private boolean virtual = false;
private IRI propertyIRI;
private String valueUrl;
private String format;
private String decimalChar = ".";
private String groupChar;
private String separator;
private boolean trim = true;
private boolean virtual = false;
private String[] propPlaceholder;
private String[] valPlaceholder;

public String getName() {
return name;
Expand All @@ -61,18 +69,38 @@ public void setLang(String lang) {
this.lang = lang;
}

/**
* Get default value
*
* @return
*/
public String getDefaultValue() {
return defaultValue;
}

/**
* Set default value
*
* @param defaultValue
*/
public void setDefaultValue(String defaultValue) {
this.defaultValue = defaultValue;
}

/**
* Get NULL value
*
* @return
*/
public String getNullValue() {
return nullValue;
}

/**
* Set NULL value
*
* @param nullValue
*/
public void setNullValue(String nullValue) {
this.nullValue = nullValue;
}
Expand All @@ -85,18 +113,47 @@ public void setRequired(boolean isRequired) {
this.required = isRequired;
}

/**
* IS virtual table ?
*
* @return
*/
public boolean isVirtual() {
return virtual;
}

public void setVirtual(boolean isVirtual) {
this.virtual = isVirtual;
/**
* Set virtual table
*
* @param virtual
*/
public void setVirtual(boolean virtual) {
this.virtual = virtual;
}

public IRI getPropertyIRI() {
return propertyIRI;
}

/**
* Extract placeholders (if any)
*
* @param template URI template string
* @return array of placeholders
*/
private String[] extractPlaceholders(String template) {
Matcher matcher = PLACEHOLDERS.matcher(template);
if (matcher.find()) {
int matches = matcher.groupCount();
String[] placeholders = new String[matches];
for (int i = 0; i < matches; i++) {
placeholders[i] = matcher.group(i + 1);
}
return placeholders;
}
return null;
}

/**
* Set property URL (predicate IRI)
*
Expand All @@ -116,50 +173,111 @@ public void setPropertyIRI(String propertyUrl) {
this.propertyIRI = Values.iri("", propertyUrl);
}

/**
* Get valueURL
*
* @return
*/
public String getValueUrl() {
return valueUrl;
}

/**
* Set valueUrl
*
* @param valueUrl
*/
public void setValueUrl(String valueUrl) {
this.valueUrl = valueUrl;
this.valPlaceholder = extractPlaceholders(valueUrl);
}

/**
* Get format
*
* @return
*/
public String getFormat() {
return format;
}

/**
* Set format
*
* @param format
*/
public void setFormat(String format) {
this.format = format;
}

/**
* Get decimal character
*
* @return
*/
public String getDecimalChar() {
return decimalChar;
}

/**
* Set decimal character
*
* @param decimalChar
*/
public void setDecimalChar(String decimalChar) {
this.decimalChar = decimalChar;
}

/**
* Get group character
*
* @return
*/
public String getGroupChar() {
return groupChar;
}

/**
* Set group character
*
* @param groupChar
*/
public void setGroupChar(String groupChar) {
this.groupChar = groupChar;
}

/**
* Get separator character
*
* @return
*/
public String getSeparator() {
return separator;
}

/**
* Set separator character
*
* @param separator
*/
public void setSeparator(String separator) {
this.separator = separator;
}

/**
* Is trim enabled
*
* @return
*/
public boolean isTrim() {
return trim;
}

/**
* Set if value needs to be trimmed
*
* @param trim
*/
public void setTrim(boolean trim) {
this.trim = trim;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ public class CellParserDate extends CellParser {
@Override
public void setFormat(String format) {
super.setFormat(format);
System.err.println("format = " + format);
formatter = DateTimeFormatter.ofPattern(format);
}

Expand All @@ -39,7 +38,6 @@ public Value parse(String cell) {
if (formatter != null) {
s = DateTimeFormatter.ISO_DATE.format(formatter.parse(s));
}
System.err.println("date = " + s);
return Values.literal(s, getDataType());
}

Expand Down
4 changes: 2 additions & 2 deletions core/rio/csvw/src/test/resources/painters.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"wikidata_id","first_name","last_name","country_id","country_name_nl","country_name_en","date_of_birth","married","languages"
"Q5582","Vincent","van Gogh","Q29999","Nederland","The Netherlands","30/3/1853","No","dutch french"
"Q5582"," Vincent","van Gogh","Q29999","Nederland","The Netherlands","30/3/1853","No","dutch french"
"Q164712","Paul","Delvaux","Q31","België","Belgium","23/9/1897","Yes","french"
"Q46408","Georgia","O'Keeffe","Q30","Verenigde Staten","United States","15/11/1887","Yes","english"
"Q46408","Georgia ","O'Keeffe","Q30","Verenigde Staten","United States","15/11/1887","Yes","english"

0 comments on commit f930705

Please sign in to comment.