forked from eclipse-rdf4j/rdf4j
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
eclipse-rdf4jGH-5058: initial code (WIP)
- Loading branch information
1 parent
51f09c3
commit 8c3942d
Showing
16 changed files
with
2,273 additions
and
0 deletions.
There are no files selected for viewing
76 changes: 76 additions & 0 deletions
76
core/model-vocabulary/src/main/java/org/eclipse/rdf4j/model/vocabulary/CSVW.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2024 Eclipse RDF4J contributors. | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Distribution License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/org/documents/edl-v10.php. | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
******************************************************************************/ | ||
|
||
package org.eclipse.rdf4j.model.vocabulary; | ||
|
||
import org.eclipse.rdf4j.model.IRI; | ||
import org.eclipse.rdf4j.model.Namespace; | ||
|
||
/** | ||
* Constants for CSV on the Web | ||
* | ||
* @author Bart Hanssens | ||
* @see <a href="https://csvw.org/">CSV on the Web</a> | ||
*/ | ||
public class CSVW { | ||
/** | ||
* The CSVW namespace: http://www.w3.org/ns/csvw# | ||
*/ | ||
public static final String NAMESPACE = "http://www.w3.org/ns/csvw#"; | ||
|
||
/** | ||
* Recommended prefix for the namespace: "csvw" | ||
*/ | ||
public static final String PREFIX = "csvw"; | ||
|
||
/** | ||
* An immutable {@link Namespace} constant that represents the namespace. | ||
*/ | ||
public static final Namespace NS = Vocabularies.createNamespace(PREFIX, NAMESPACE); | ||
|
||
// Classes | ||
|
||
// Properties | ||
/** csvw:basee */ | ||
public static final IRI BASE; | ||
|
||
/** csvw:datatype */ | ||
public static final IRI DATATYPE; | ||
|
||
/** csvw:default */ | ||
public static final IRI DEFAULT; | ||
|
||
/** csvw:lang */ | ||
public static final IRI LANG; | ||
|
||
/** csvw:propertyUrl */ | ||
public static final IRI PROPERTY_URL; | ||
|
||
/** csvw:tableSchema */ | ||
public static final IRI TABLE_SCHEMA; | ||
|
||
/** csvw:url */ | ||
public static final IRI URL; | ||
|
||
/** csvw:valueUrl */ | ||
public static final IRI VALUE_URL; | ||
|
||
static { | ||
BASE = Vocabularies.createIRI(NAMESPACE, "base"); | ||
DATATYPE = Vocabularies.createIRI(NAMESPACE, "datatype"); | ||
DEFAULT = Vocabularies.createIRI(NAMESPACE, "default"); | ||
LANG = Vocabularies.createIRI(NAMESPACE, "lang"); | ||
PROPERTY_URL = Vocabularies.createIRI(NAMESPACE, "propertyUrl"); | ||
TABLE_SCHEMA = Vocabularies.createIRI(NAMESPACE, "tableSchema"); | ||
URL = Vocabularies.createIRI(NAMESPACE, "url"); | ||
VALUE_URL = Vocabularies.createIRI(NAMESPACE, "valueUrl"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>org.eclipse.rdf4j</groupId> | ||
<artifactId>rdf4j-rio</artifactId> | ||
<version>5.1.0-SNAPSHOT</version> | ||
</parent> | ||
<artifactId>rdf4j-rio-csw</artifactId> | ||
<packaging>jar</packaging> | ||
<name>RDF4J: Rio - CSVWeb</name> | ||
<description>Experimental Rio parser implementation for CSV on the Web</description> | ||
<dependencies> | ||
<dependency> | ||
<groupId>${project.groupId}</groupId> | ||
<artifactId>rdf4j-model</artifactId> | ||
<version>${project.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>com.google.guava</groupId> | ||
<artifactId>guava</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
<dependency> | ||
<groupId>${project.groupId}</groupId> | ||
<artifactId>rdf4j-rio-jsonld-legacy</artifactId> | ||
<version>${project.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>${project.groupId}</groupId> | ||
<artifactId>rdf4j-rio-api</artifactId> | ||
<version>${project.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-annotations</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-core</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-databind</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-codec</groupId> | ||
<artifactId>commons-codec</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>no.hasmac</groupId> | ||
<artifactId>hasmac-json-ld</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.opencsv</groupId> | ||
<artifactId>opencsv</artifactId> | ||
</dependency> | ||
</dependencies> | ||
</project> |
102 changes: 102 additions & 0 deletions
102
core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2024 Eclipse RDF4J contributors. | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Distribution License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/org/documents/edl-v10.php. | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*******************************************************************************/ | ||
package org.eclipse.rdf4j.rio.csvw; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.Reader; | ||
import java.util.Set; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
|
||
import org.eclipse.rdf4j.model.Model; | ||
import org.eclipse.rdf4j.model.Resource; | ||
import org.eclipse.rdf4j.model.Statement; | ||
import org.eclipse.rdf4j.model.Value; | ||
import org.eclipse.rdf4j.model.impl.LinkedHashModel; | ||
import org.eclipse.rdf4j.model.util.Models; | ||
import org.eclipse.rdf4j.model.util.RDFCollections; | ||
import org.eclipse.rdf4j.model.vocabulary.CSVW; | ||
import org.eclipse.rdf4j.rio.ParserConfig; | ||
import org.eclipse.rdf4j.rio.RDFFormat; | ||
import org.eclipse.rdf4j.rio.RDFHandlerException; | ||
import org.eclipse.rdf4j.rio.RDFParseException; | ||
import org.eclipse.rdf4j.rio.Rio; | ||
import org.eclipse.rdf4j.rio.csvw.parsers.CellParserFactory; | ||
import org.eclipse.rdf4j.rio.csvw.parsers.Parser; | ||
import org.eclipse.rdf4j.rio.helpers.AbstractRDFParser; | ||
import org.eclipse.rdf4j.rio.helpers.JSONLDSettings; | ||
|
||
/** | ||
* | ||
* @author Bart Hanssens | ||
* | ||
* @since 5.1.0 | ||
*/ | ||
public class CSVWParser extends AbstractRDFParser { | ||
|
||
@Override | ||
public RDFFormat getRDFFormat() { | ||
return RDFFormat.CSVW; | ||
} | ||
|
||
@Override | ||
public synchronized void parse(InputStream in, String baseURI) | ||
throws IOException, RDFParseException, RDFHandlerException { | ||
Model metadata = parseMetadata(in, null, baseURI); | ||
System.err.println(metadata); | ||
|
||
Iterable<Statement> statements = metadata.getStatements(null, CSVW.TABLE_SCHEMA, null); | ||
for (Statement s : statements) { | ||
Value obj = s.getObject(); | ||
Model cols = RDFCollections.getCollection(metadata, (Resource) obj, new LinkedHashModel()); | ||
metadata.getStatements((Resource) obj, null, null).forEach(a -> { | ||
System.err.println(a); | ||
} | ||
); | ||
|
||
Parser p = new Parser(); | ||
} | ||
clear(); | ||
} | ||
|
||
@Override | ||
public void parse(Reader reader, String baseURI) | ||
throws IOException, RDFParseException, RDFHandlerException { | ||
Model metadata = parseMetadata(null, reader, baseURI); | ||
|
||
clear(); | ||
} | ||
|
||
/** | ||
* Parse JSON-LD metadata | ||
* | ||
* @param in | ||
* @param reader | ||
* @param baseURI | ||
* @return | ||
* @throws IOException | ||
*/ | ||
private Model parseMetadata(InputStream in, Reader reader, String baseURI) throws IOException { | ||
Model metadata = null; | ||
ParserConfig cfg = new ParserConfig(); | ||
|
||
if (in != null) { | ||
metadata = Rio.parse(in, null, RDFFormat.JSONLD, cfg); | ||
System.err.println(metadata); | ||
} | ||
|
||
// if (reader != null) { | ||
// return Rio.parse(reader, baseURI, RDFFormat.JSONLD, cfg); | ||
// } | ||
return metadata; | ||
} | ||
} |
39 changes: 39 additions & 0 deletions
39
core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParserFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2024 Eclipse RDF4J contributors. | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Distribution License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/org/documents/edl-v10.php. | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*******************************************************************************/ | ||
package org.eclipse.rdf4j.rio.csvw; | ||
|
||
import org.eclipse.rdf4j.rio.RDFFormat; | ||
import org.eclipse.rdf4j.rio.RDFParserFactory; | ||
|
||
/** | ||
* An {@link RDFParserFactory} for CSV on the Web parsers. | ||
* | ||
* @author Bart Hanssens | ||
* | ||
* @since 5.1.0 | ||
*/ | ||
public class CSVWParserFactory implements RDFParserFactory { | ||
/** | ||
* Returns {@link RDFFormat#CSVW}. | ||
*/ | ||
@Override | ||
public RDFFormat getRDFFormat() { | ||
return RDFFormat.CSVW; | ||
} | ||
|
||
/** | ||
* Returns a new instance of {@link HDTParser}. | ||
*/ | ||
@Override | ||
public CSVWParser getParser() { | ||
return new CSVWParser(); | ||
} | ||
} |
42 changes: 42 additions & 0 deletions
42
core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/CSVWParserSettings.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2024 Eclipse RDF4J contributors. | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Distribution License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/org/documents/edl-v10.php. | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*******************************************************************************/ | ||
package org.eclipse.rdf4j.rio.csvw; | ||
|
||
import org.eclipse.rdf4j.rio.RioSetting; | ||
import org.eclipse.rdf4j.rio.helpers.BooleanRioSetting; | ||
|
||
/** | ||
* ParserSettings for the CSV on the Web parser features. | ||
* <p> | ||
* Several of these settings can be overridden by means of a system property, but only if specified at JVM startup time. | ||
* | ||
* @author Bart Hanssens | ||
* | ||
* @since 5.1.0 | ||
*/ | ||
public class CSVWParserSettings { | ||
|
||
/** | ||
* Boolean setting for parser to determine whether syntactically invalid lines in CSVW generate a parse error. | ||
* <p> | ||
* Defaults to true. | ||
* <p> | ||
* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.ntriples.fail_on_invalid_lines} | ||
*/ | ||
public static final BooleanRioSetting FAIL_ON_INVALID_LINES = new BooleanRioSetting( | ||
"org.eclipse.rdf4j.rio.csvw.fail_on_invalid_lines", "Fail on CSVW invalid lines", Boolean.TRUE); | ||
|
||
/** | ||
* Private constructor | ||
*/ | ||
private CSVWParserSettings() { | ||
} | ||
} |
7 changes: 7 additions & 0 deletions
7
core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/package.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<html> | ||
<head></head> | ||
<body> | ||
Parser for CSV on the Web. The parser adheres to the | ||
<a href="https://w3c.github.io/csvw/syntax/">editor's draft of 02 November 2022</a>. | ||
</body> | ||
</html> |
Oops, something went wrong.