Skip to content

Commit

Permalink
eclipse-rdf4jGH-5058: initial code (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
barthanssens committed Jul 7, 2024
1 parent 51f09c3 commit 8c3942d
Show file tree
Hide file tree
Showing 16 changed files with 2,273 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/

package org.eclipse.rdf4j.model.vocabulary;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Namespace;

/**
* Constants for CSV on the Web
*
* @author Bart Hanssens
* @see <a href="https://csvw.org/">CSV on the Web</a>
*/
public class CSVW {
/**
* The CSVW namespace: http://www.w3.org/ns/csvw#
*/
public static final String NAMESPACE = "http://www.w3.org/ns/csvw#";

/**
* Recommended prefix for the namespace: "csvw"
*/
public static final String PREFIX = "csvw";

/**
* An immutable {@link Namespace} constant that represents the namespace.
*/
public static final Namespace NS = Vocabularies.createNamespace(PREFIX, NAMESPACE);

// Classes

// Properties
/** csvw:basee */
public static final IRI BASE;

/** csvw:datatype */
public static final IRI DATATYPE;

/** csvw:default */
public static final IRI DEFAULT;

/** csvw:lang */
public static final IRI LANG;

/** csvw:propertyUrl */
public static final IRI PROPERTY_URL;

/** csvw:tableSchema */
public static final IRI TABLE_SCHEMA;

/** csvw:url */
public static final IRI URL;

/** csvw:valueUrl */
public static final IRI VALUE_URL;

static {
BASE = Vocabularies.createIRI(NAMESPACE, "base");
DATATYPE = Vocabularies.createIRI(NAMESPACE, "datatype");
DEFAULT = Vocabularies.createIRI(NAMESPACE, "default");
LANG = Vocabularies.createIRI(NAMESPACE, "lang");
PROPERTY_URL = Vocabularies.createIRI(NAMESPACE, "propertyUrl");
TABLE_SCHEMA = Vocabularies.createIRI(NAMESPACE, "tableSchema");
URL = Vocabularies.createIRI(NAMESPACE, "url");
VALUE_URL = Vocabularies.createIRI(NAMESPACE, "valueUrl");
}
}
12 changes: 12 additions & 0 deletions core/rio/api/src/main/java/org/eclipse/rdf4j/rio/RDFFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,18 @@ public class RDFFormat extends FileFormat {
List.of("application/vnd.hdt"), null, List.of("hdt"), null,
SUPPORTS_NAMESPACES, NO_CONTEXTS, NO_RDF_STAR);

/**
* The <a href="https://w3c.github.io/csvw/csv2rdf/">CSV on the Web</a> file format, an RDF serialization format.
* <p>
* The file extension <code>.csv</code> is recommended for CSV documents.
* </p>
*
* @see <a href="https://w3c.github.io/csvw/csv2rdf/">CSVW</a>
*/
public static final RDFFormat CSVW = new RDFFormat("CSVW",
List.of("application/csvm+json"), StandardCharsets.UTF_8, List.of("csv"), null,
SUPPORTS_NAMESPACES, NO_CONTEXTS, NO_RDF_STAR);

/*----------------*
* Static methods *
*----------------*/
Expand Down
62 changes: 62 additions & 0 deletions core/rio/csvw/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-rio</artifactId>
<version>5.1.0-SNAPSHOT</version>
</parent>
<artifactId>rdf4j-rio-csw</artifactId>
<packaging>jar</packaging>
<name>RDF4J: Rio - CSVWeb</name>
<description>Experimental Rio parser implementation for CSV on the Web</description>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>rdf4j-model</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>rdf4j-rio-jsonld-legacy</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>rdf4j-rio-api</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>no.hasmac</groupId>
<artifactId>hasmac-json-ld</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
</dependency>
</dependencies>
</project>
102 changes: 102 additions & 0 deletions core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
import org.eclipse.rdf4j.model.util.Models;
import org.eclipse.rdf4j.model.util.RDFCollections;
import org.eclipse.rdf4j.model.vocabulary.CSVW;
import org.eclipse.rdf4j.rio.ParserConfig;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.rio.csvw.parsers.CellParserFactory;
import org.eclipse.rdf4j.rio.csvw.parsers.Parser;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFParser;
import org.eclipse.rdf4j.rio.helpers.JSONLDSettings;

/**
*
* @author Bart Hanssens
*
* @since 5.1.0
*/
public class CSVWParser extends AbstractRDFParser {

@Override
public RDFFormat getRDFFormat() {
return RDFFormat.CSVW;
}

@Override
public synchronized void parse(InputStream in, String baseURI)
throws IOException, RDFParseException, RDFHandlerException {
Model metadata = parseMetadata(in, null, baseURI);
System.err.println(metadata);

Iterable<Statement> statements = metadata.getStatements(null, CSVW.TABLE_SCHEMA, null);
for (Statement s : statements) {
Value obj = s.getObject();
Model cols = RDFCollections.getCollection(metadata, (Resource) obj, new LinkedHashModel());
metadata.getStatements((Resource) obj, null, null).forEach(a -> {
System.err.println(a);
}
);

Parser p = new Parser();
}
clear();
}

@Override
public void parse(Reader reader, String baseURI)
throws IOException, RDFParseException, RDFHandlerException {
Model metadata = parseMetadata(null, reader, baseURI);

clear();
}

/**
* Parse JSON-LD metadata
*
* @param in
* @param reader
* @param baseURI
* @return
* @throws IOException
*/
private Model parseMetadata(InputStream in, Reader reader, String baseURI) throws IOException {
Model metadata = null;
ParserConfig cfg = new ParserConfig();

if (in != null) {
metadata = Rio.parse(in, null, RDFFormat.JSONLD, cfg);
System.err.println(metadata);
}

// if (reader != null) {
// return Rio.parse(reader, baseURI, RDFFormat.JSONLD, cfg);
// }
return metadata;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw;

import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParserFactory;

/**
* An {@link RDFParserFactory} for CSV on the Web parsers.
*
* @author Bart Hanssens
*
* @since 5.1.0
*/
public class CSVWParserFactory implements RDFParserFactory {
/**
* Returns {@link RDFFormat#CSVW}.
*/
@Override
public RDFFormat getRDFFormat() {
return RDFFormat.CSVW;
}

/**
* Returns a new instance of {@link HDTParser}.
*/
@Override
public CSVWParser getParser() {
return new CSVWParser();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw;

import org.eclipse.rdf4j.rio.RioSetting;
import org.eclipse.rdf4j.rio.helpers.BooleanRioSetting;

/**
* ParserSettings for the CSV on the Web parser features.
* <p>
* Several of these settings can be overridden by means of a system property, but only if specified at JVM startup time.
*
* @author Bart Hanssens
*
* @since 5.1.0
*/
public class CSVWParserSettings {

/**
* Boolean setting for parser to determine whether syntactically invalid lines in CSVW generate a parse error.
* <p>
* Defaults to true.
* <p>
* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.ntriples.fail_on_invalid_lines}
*/
public static final BooleanRioSetting FAIL_ON_INVALID_LINES = new BooleanRioSetting(
"org.eclipse.rdf4j.rio.csvw.fail_on_invalid_lines", "Fail on CSVW invalid lines", Boolean.TRUE);

/**
* Private constructor
*/
private CSVWParserSettings() {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<html>
<head></head>
<body>
Parser for CSV on the Web. The parser adheres to the
<a href="https://w3c.github.io/csvw/syntax/">editor's draft of 02 November 2022</a>.
</body>
</html>
Loading

0 comments on commit 8c3942d

Please sign in to comment.