Skip to content

Commit

Permalink
eclipse-rdf4jGH-5058: additional parser code (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
barthanssens committed Jul 11, 2024
1 parent f335478 commit ce0c8e5
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ private CellParser getCellParser(Model metadata, Resource column) {
Models.getPropertyString(metadata, column, CSVW.GROUP_CHAR).ifPresent(v -> parser.setGroupChar(v));

// mostly for date formats
Models.getPropertyString(metadata, column, CSVW.FORMAT).ifPresent(v -> parser.setFormat(v));
getFormat(metadata, column).ifPresent(v -> parser.setFormat(v));

Models.getPropertyString(metadata, column, CSVW.VALUE_URL).ifPresent(v -> parser.setValueURL(v));

Expand Down Expand Up @@ -272,6 +272,24 @@ private IRI getDatatypeIRI(Model metadata, Resource column) {
return XSD.valueOf(datatype.stringValue().toUpperCase()).getIri();
}

/**
* Get format string
*
* @param metadata
* @param column
* @return
*/
private Optional<String> getFormat(Model metadata, Resource column) {
Optional<Value> val = Models.getProperty(metadata, column, CSVW.DATATYPE);
if (val.isPresent() && val.get().isBNode()) {
val = Models.getProperty(metadata, (Resource) val.get(), CSVW.FORMAT);
if (val.isPresent() && val.get().isLiteral()) {
return Optional.of(val.get().stringValue());
}
}
return Optional.empty();
}

/**
* Get "about" URL template, to be used to create the subject of the triples
*
Expand Down Expand Up @@ -359,18 +377,20 @@ private void parseCSV(Model metadata, RDFHandler handler, URI csvFile, CellParse
private CSVReader getCSVReader(Model metadata, Resource table, Reader reader) {
CSVParserBuilder parserBuilder = new CSVParserBuilder();
CSVReaderBuilder builder = new CSVReaderBuilder(reader);
builder.withSkipLines(1);

Optional<Value> dialect = Models.getProperty(metadata, table, CSVW.DIALECT);
if (dialect.isPresent()) {
Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.DELIMITER)
Optional<Value> val = Models.getProperty(metadata, table, CSVW.DIALECT);
if (val.isPresent()) {
Resource dialect = (Resource) val.get();
Models.getPropertyString(metadata, dialect, CSVW.DELIMITER)
.ifPresent(v -> parserBuilder.withSeparator(v.charAt(0)));
Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.HEADER)
Models.getPropertyString(metadata, dialect, CSVW.HEADER)
.ifPresent(v -> builder.withSkipLines(v.equalsIgnoreCase("false") ? 0 : 1));
Models.getPropertyString(metadata, (Resource) dialect.get(), CSVW.QUOTE_CHAR)
Models.getPropertyString(metadata, dialect, CSVW.QUOTE_CHAR)
.ifPresent(v -> parserBuilder.withQuoteChar(v.charAt(0)));
}

return new CSVReaderBuilder(reader).withCSVParser(parserBuilder.build()).build();
return builder.withCSVParser(parserBuilder.build()).build();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class CellParserDate extends CellParser {
@Override
public void setFormat(String format) {
super.setFormat(format);
System.err.println(format);
System.err.println("format = " + format);
formatter = DateTimeFormatter.ofPattern(format);
}

Expand All @@ -39,6 +39,7 @@ public Value parse(String cell) {
if (formatter != null) {
s = DateTimeFormatter.ISO_DATE.format(formatter.parse(s));
}
System.err.println("date = " + s);
return Values.literal(s, dataType);
}

Expand Down
2 changes: 1 addition & 1 deletion core/rio/csvw/src/test/resources/painters.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"wikidata_id","first_name","last_name,country_id","country_name_nl","country_name_en","date_of_birth","married","languages"
"wikidata_id","first_name","last_name","country_id","country_name_nl","country_name_en","date_of_birth","married","languages"
"Q5582","Vincent","van Gogh","Q29999","Nederland","The Netherlands","30/3/1853","No","dutch french"
"Q164712","Paul","Delvaux","Q31","België","Belgium","23/9/1897","Yes","french"
"Q46408","Georgia","O'Keeffe","Q30","Verenigde Staten","United States","15/11/1887","Yes","english"
Expand Down

0 comments on commit ce0c8e5

Please sign in to comment.