Skip to content

Commit

Permalink
Merge branch 'development' into 'main'
Browse files Browse the repository at this point in the history
Fix generation of iri's based on the data value and some errors in serialization

See merge request rml/proc/rmlweaver-js!3
  • Loading branch information
s-minoo committed Aug 6, 2024
2 parents 2ea4cfc + 87038a6 commit 5f3476b
Show file tree
Hide file tree
Showing 115 changed files with 417 additions and 307 deletions.
7 changes: 4 additions & 3 deletions .prettierrc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"tabWidth": 4,
"semi": false,
"singleQuote": true
"printWidth": 80,
"tabWidth": 4,
"semi": false,
"singleQuote": true
}
40 changes: 31 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
# RMLWeaver-JS

**RMLWeaver-JS** is a Node.js tool designed to execute dot files using npm. Dot files are commonly used for describing directed graphs. This tool efficiently performs dot file operations using RxJS streams.
## Table of content
<!--toc:start-->
- [RMLWeaver-JS](#rmlweaver-js)
- [Usage](#usage)
- [Execute a Dot File](#execute-a-dot-file)
- [Execute tests](#execute-tests)
- [Future works](#future-works)
- [Acknowledgement](#acknowledgement)
<!--toc:end-->

## Usage
---

**RMLWeaver-JS** is a Node.js algebraic mapping engine that works
in tandem with **[AlgeMapLoom-rs](https://github.com/RMLio/algemaploom-rs/)**.
RMLWeaver-JS executes the mapping processes described by the mapping plan
generated by AlgeMapLoom-rs.
Based on RX-JS streams, RMLWeaver-JS maps the heterogeneous data to RDF
knowledge graphs in streaming manner.
This results in an overall lower memory usage.

## Usage

### Execute a Dot File

Expand All @@ -18,25 +35,30 @@ npm run execute_dot ${file}

To run in debug mode add argument -debug
Like

```
npm run execute_dot -- ${file} -debug
```

### Execute tests
RML test cases, for CSV input, are located under the folder [`./test/rml-mapper-test-cases-csv/`](./test/rml-mapper-test-cases-csv/).

ShExML test cases are located under the folder [`./test/shexml/`](./test/shexml/).

To execute the tests, use:

```
npm test
```

### Todo:
- LeftJoin and RightJoin.
- More file types for source and target.
- (Not Sure) Fix that extend operator does not push a value if the value for the template is missing. (Test 2c)
- Optimize code
- Write benchmarks.
## Future works:

- LeftJoin and RightJoin.
- More file types for source and target.
- (Not Sure) Fix that extend operator does not push a value if the value for the template is missing. (Test 2c)
- Optimize code
- Write benchmarks.

## Acknowledgement
This proof of concept algebraic mapping engine is implemented by [Tristan Verbeken](https://github.com/TR1VER)

This proof of concept algebraic mapping engine is implemented by [Tristan Verbeken](https://github.com/TR1VER)
130 changes: 94 additions & 36 deletions src/operator/extendOperator.js
Original file line number Diff line number Diff line change
@@ -1,45 +1,77 @@
import { Operator } from './Operator.js'
import Handlebars from 'handlebars'
import { BlankNode, Iri, LanguageDataType, Literal } from '../types.js'
import {
BlankNode,
DataTypedLiteral,
Iri,
LanguageLiteral,
Literal,
} from '../types.js'
import { forEach } from 'most'

export class ExtendOp extends Operator {
generateMapping(key, mapping) {
generateMapping(key, extend_func) {
// In function so recursion is possible

if (key.charAt(0) === '?') key = key.slice(1) // Remove ? When we have a *variable* attribute
let regex = /\{([^}]+)}/g // Match text between curly brackets.
//let regex = /[^\\]({[^}]+[^\\]})/g // Match text between curly brackets.
let regex = /({[^\\{\\}]*})/g
let left_escaped_curly_regex = /\\\\\{/g
let right_escaped_curly_regex = /\\\\}/g
const innerFunction =
mapping.inner_function != null
? this.generateMapping(key, mapping.inner_function)
extend_func.inner_function != null
? this.generateMapping(key, extend_func.inner_function)
: null

switch (mapping.type) {
switch (extend_func.type) {
case 'Iri':
return (obj) => {
innerFunction(obj)
obj[key] = new Iri(obj[key])
let iri_value = ''
if (
extend_func.base_iri != null &&
URL.canParse(obj[key], extend_func.base_iri) &&
!URL.canParse(obj[key], undefined)
) {
iri_value = extend_func.base_iri + obj[key]
} else {
iri_value = obj[key]
}
if ( iri_value.search(" ") >= 0 ) {
obj[key] = undefined
} else {
obj[key] = new Iri(iri_value)
}
}

case 'Literal':
const dtypeFunction =
mapping.dtype_function != null
? this.generateMapping(key, mapping.inner_function)
extend_func.dtype_function != null
? this.generateMapping(key, extend_func.dtype_function)
: null
const langtypeFunction =
mapping.langtype_function != null
? this.generateMapping(key, mapping.inner_function)
extend_func.langtype_function != null
? this.generateMapping(
key,
extend_func.langtype_function,
)
: null

return (obj) => {
innerFunction(obj)
let literal_value = obj[key]
let obj_value = new Literal(literal_value)
if (dtypeFunction !== null) {
dtypeFunction(obj)
}
if (langtypeFunction !== null) {
obj_value = new DataTypedLiteral(
literal_value,
obj[key],
)
} else if (langtypeFunction !== null) {
langtypeFunction(obj)
obj_value = new LanguageLiteral(literal_value, obj[key])
}
obj[key] = new Literal(obj[key])
obj[key] = obj_value
}

case 'BlankNode':
Expand All @@ -52,77 +84,103 @@ export class ExtendOp extends Operator {
return (obj) => {
innerFunction(obj)

obj[key] = encodeURI(obj[key])
obj[key] = encodeURIComponent(obj[key])
.replace(',', '%2C')
.replace('(', '%28')
.replace(')', '%29') // Encode URI, Maybe manually in the future to match RML mapper.
}

case 'Reference':
return (obj) => {
obj[key] = obj[mapping.value]
obj[key] = obj[extend_func.value]
}

case 'Constant':
return (obj) => {
obj[key] = mapping.value
obj[key] = extend_func.value
}

case 'TemplateString':
// Match text between curly brackets.
let template_string = mapping.value.replace(
regex,
(match, content) => `{{[${content}]}}`,
) // Double brackets for HandleBars.
let template = Handlebars.compile(template_string)
return (obj) => {
obj[key] = template(obj)
//let template_string = mapping.value.replace(
// regex,
// (match, content) => `{{[${content}]}}`,
//) // Double brackets for HandleBars.
//let template = Handlebars.compile(template_string)
let value = extend_func.value.replace(
left_escaped_curly_regex,
'\\{',
)
value = value.replace(right_escaped_curly_regex, '\\}')
return (sol_map) => {
let result = value.replace(
regex,
(match, captured, offset, full_string) => {
let key = captured.substring(1, captured.length - 1)
return sol_map[key]
},
)
result = result.replace(/\\{/g, '{')
result = result.replace(/\\}/g, '}')
sol_map[key] = result
}

case 'TemplateFunctionValue':
let template_string_2 = mapping.template.replace(
regex,
(match, content) => `{{[${content}]}}`,
) // Double brackets for HandleBars.
let template2 = Handlebars.compile(template_string_2)
let template = extend_func.template.replace(
left_escaped_curly_regex,
'\\{',
)
template = template.replace(right_escaped_curly_regex, '\\}')

let var_function_pairs = {}
for (let pair of mapping.variable_function_pairs) {
for (let pair of extend_func.variable_function_pairs) {
let variable = pair[0]
let ext_func = pair[1]
ext_func = this.generateMapping(key, ext_func)
var_function_pairs[variable] = ext_func
}

return (obj) => {
let temp_val = {};
let temp_val = {}
for (let variable in var_function_pairs) {
let nest_func = var_function_pairs[variable]
nest_func(obj)
temp_val[variable] = obj[key]
}
obj[key] = template2(temp_val)

let result = template.replace(
regex,
(match, captured, offset, full_string) => {
let key = captured.substring(1, captured.length - 1)
return temp_val[key]
},
)
result = result.replace(/\\{/g, '{')
result = result.replace(/\\}/g, '}')

obj[key] = result
}

case 'Concatenate':
const left_function = this.generateMapping(
key,
mapping.left_value,
extend_func.left_value,
)
const right_function = this.generateMapping(
key,
mapping.right_value,
extend_func.right_value,
)
return (obj) => {
left_function(obj)
const left_value = obj[key]
right_function(obj)
const right_value = obj[key]
obj[key] = left_value + mapping.separator + right_value
obj[key] = left_value + extend_func.separator + right_value
}

default:
throw Error(
`Type (${mapping.type}) found in extend operator not supported!`,
`Type (${extend_func.type}) found in extend operator not supported!`,
)
}
}
Expand Down
12 changes: 8 additions & 4 deletions src/operator/serializerOperator.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export function generateQuadTemplate(template) {
const fields = template.split(' ')
const variables = [] // Hold the indices of fields containing variables. So we can change these later.
const tags = new Array(fields.length).fill('') // Hold the indices of fields containing language tags.
const values = fields.map((value, index) => {
let values = fields.map((value, index) => {
// extract langtag and datatpe using regex
const langTag = value.match(/@([a-zA-Z]+(-[a-zA-Z0-9]+)*)/)
const dataType = value.match(/\^\^<(.+)>/)
Expand Down Expand Up @@ -95,15 +95,19 @@ export function generateQuadTemplate(template) {
})
if (tags !== {}) {
return (obj) => {
let error_flag = false
variables.forEach((i) => {
if (obj[i[1]].render() === undefined) {
return null
if (
obj[i[1]] === undefined ||
obj[i[1]].render() === undefined
) {
error_flag = true
} else {
values[i[0]] = obj[i[1]].render() + tags[i[0]]
}
})

if (values.includes('')) {
if (values.includes('') || error_flag) {
return undefined
} else {
return values.join(' ') + ' .'
Expand Down
34 changes: 21 additions & 13 deletions src/types.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,6 @@ class DataType {
}
}

export class LanguageDataType extends DataType {
content = null
language = ''
constructor(content, language) {
super(content)
this.language = language
}
render() {
return this.content.render() + this.language
}
}

export class Iri extends DataType {
constructor(value) {
super(value)
Expand All @@ -36,7 +24,6 @@ export class Iri extends DataType {
return `<${this.value}>`
}
}

export class Literal extends DataType {
constructor(value) {
super(value)
Expand All @@ -49,6 +36,27 @@ export class Literal extends DataType {
return `"${this.value}"`
}
}

export class DataTypedLiteral extends Literal {

constructor(value, datatype) {
super(value)
this.datatype = datatype
}
render() {
return super.render() + '^^' + "<" + this.datatype + ">"
}
}

export class LanguageLiteral extends Literal {
constructor(value, language) {
super(value)
this.language = language
}
render() {
return super.render() + '@' + this.language
}
}
export class BlankNode extends DataType {
constructor(value) {
super(value)
Expand Down
2 changes: 1 addition & 1 deletion test/rml-mapper-test-cases-csv/GTFS-case/mapping.dot
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ digraph {
1 [ label = "{\"id\":\"Projection_1\",\"operator\":{\"config\":{\"projection_attributes\":[\"stop_id\",\"parent_station\"]},\"type\":\"ProjectOp\"}}" ]
2 [ label = "{\"id\":\"Fragmenter_2\",\"operator\":{\"config\":{\"from\":\"default\",\"to\":[\"default\",\"join_0\",\"join_0\"]},\"type\":\"FragmentOp\"}}" ]
3 [ label = "{\"id\":\"Join_3\",\"operator\":{\"config\":{\"join_alias\":\"join_0\",\"join_type\":\"InnerJoin\",\"left_right_attr_pairs\":[[\"parent_station\",\"stop_id\"]],\"predicate_type\":\"Equal\"},\"type\":\"JoinOp\"}}" ]
4 [ label = "{\"id\":\"Extend_4\",\"operator\":{\"config\":{\"?tm0_o0_0\":{\"inner_function\":{\"inner_function\":{\"type\":\"TemplateString\",\"value\":\"http://transport.linkeddata.es/madrid/metro/stops/{join_0_stop_id}\"},\"type\":\"UriEncode\"},\"type\":\"Iri\"},\"?tm0_p0_0\":{\"inner_function\":{\"inner_function\":{\"type\":\"Constant\",\"value\":\"http://vocab.gtfs.org/terms#parentStation\"},\"type\":\"UriEncode\"},\"type\":\"Iri\"},\"?tm0_sm\":{\"inner_function\":{\"inner_function\":{\"type\":\"TemplateString\",\"value\":\"http://transport.linkeddata.es/madrid/metro/stops/{stop_id}\"},\"type\":\"UriEncode\"},\"type\":\"Iri\"}},\"type\":\"ExtendOp\"}}" ]
4 [ label = "{\"id\":\"Extend_4\",\"operator\":{\"config\":{\"?tm0_o0_0\":{\"base_iri\":null,\"inner_function\":{\"template\":\"http://transport.linkeddata.es/madrid/metro/stops/{join_0_stop_id}\",\"type\":\"TemplateFunctionValue\",\"variable_function_pairs\":[[\"join_0_stop_id\",{\"inner_function\":{\"type\":\"Reference\",\"value\":\"join_0_stop_id\"},\"type\":\"UriEncode\"}]]},\"type\":\"Iri\"},\"?tm0_p0_0\":{\"base_iri\":null,\"inner_function\":{\"type\":\"Constant\",\"value\":\"http://vocab.gtfs.org/terms#parentStation\"},\"type\":\"Iri\"},\"?tm0_sm\":{\"base_iri\":null,\"inner_function\":{\"template\":\"http://transport.linkeddata.es/madrid/metro/stops/{stop_id}\",\"type\":\"TemplateFunctionValue\",\"variable_function_pairs\":[[\"stop_id\",{\"inner_function\":{\"type\":\"Reference\",\"value\":\"stop_id\"},\"type\":\"UriEncode\"}]]},\"type\":\"Iri\"}},\"type\":\"ExtendOp\"}}" ]
5 [ label = "{\"id\":\"Serialize_5\",\"operator\":{\"config\":{\"format\":\"NQuads\",\"template\":\"?tm0_sm ?tm0_p0_0 ?tm0_o0_0 .\"},\"type\":\"SerializerOp\"}}" ]
6 [ label = "{\"id\":\"Sink_6\",\"operator\":{\"config\":{\"data_format\":\"NQuads\",\"target_type\":\"StdOut\"},\"type\":\"TargetOp\"}}" ]
0 -> 1 [ label = "{\"fragment\": default}" ]
Expand Down
2 changes: 1 addition & 1 deletion test/rml-mapper-test-cases-csv/GTFS-case/mapping.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"nodes":[{"id":"Source_0","operator":{"type":"SourceOp","config":{"path":"stops.csv","source_type":"File","root_iterator":{"reference":null,"reference_formulation":"CSVRows","fields":[],"alias":null}}}},{"id":"Projection_1","operator":{"type":"ProjectOp","config":{"projection_attributes":["stop_id","parent_station"]}}},{"id":"Fragmenter_2","operator":{"type":"FragmentOp","config":{"from":"default","to":["default","join_0","join_0"]}}},{"id":"Join_3","operator":{"type":"JoinOp","config":{"left_right_attr_pairs":[["parent_station","stop_id"]],"join_type":"InnerJoin","predicate_type":"Equal","join_alias":"join_0"}}},{"id":"Extend_4","operator":{"type":"ExtendOp","config":{"?tm0_o0_0":{"type":"Iri","inner_function":{"type":"UriEncode","inner_function":{"type":"TemplateString","value":"http://transport.linkeddata.es/madrid/metro/stops/{join_0_stop_id}"}}},"?tm0_p0_0":{"type":"Iri","inner_function":{"type":"UriEncode","inner_function":{"type":"Constant","value":"http://vocab.gtfs.org/terms#parentStation"}}},"?tm0_sm":{"type":"Iri","inner_function":{"type":"UriEncode","inner_function":{"type":"TemplateString","value":"http://transport.linkeddata.es/madrid/metro/stops/{stop_id}"}}}}}},{"id":"Serialize_5","operator":{"type":"SerializerOp","config":{"template":"?tm0_sm ?tm0_p0_0 ?tm0_o0_0 .","format":"NQuads"}}},{"id":"Sink_6","operator":{"type":"TargetOp","config":{"target_type":"StdOut","data_format":"NQuads"}}}],"node_holes":[],"edge_property":"directed","edges":[[0,1,{"fragment":"default"}],[1,2,{"fragment":"default"}],[2,3,{"fragment":"join_0"}],[2,3,{"fragment":"join_0"}],[3,4,{"fragment":"default"}],[4,5,{"fragment":"default"}],[5,6,{"fragment":"default"}]]}
{"nodes":[{"id":"Source_0","operator":{"type":"SourceOp","config":{"path":"stops.csv","source_type":"File","root_iterator":{"reference":null,"reference_formulation":"CSVRows","fields":[],"alias":null}}}},{"id":"Projection_1","operator":{"type":"ProjectOp","config":{"projection_attributes":["stop_id","parent_station"]}}},{"id":"Fragmenter_2","operator":{"type":"FragmentOp","config":{"from":"default","to":["default","join_0","join_0"]}}},{"id":"Join_3","operator":{"type":"JoinOp","config":{"left_right_attr_pairs":[["parent_station","stop_id"]],"join_type":"InnerJoin","predicate_type":"Equal","join_alias":"join_0"}}},{"id":"Extend_4","operator":{"type":"ExtendOp","config":{"?tm0_sm":{"type":"Iri","base_iri":null,"inner_function":{"type":"TemplateFunctionValue","template":"http://transport.linkeddata.es/madrid/metro/stops/{stop_id}","variable_function_pairs":[["stop_id",{"type":"UriEncode","inner_function":{"type":"Reference","value":"stop_id"}}]]}},"?tm0_p0_0":{"type":"Iri","base_iri":null,"inner_function":{"type":"Constant","value":"http://vocab.gtfs.org/terms#parentStation"}},"?tm0_o0_0":{"type":"Iri","base_iri":null,"inner_function":{"type":"TemplateFunctionValue","template":"http://transport.linkeddata.es/madrid/metro/stops/{join_0_stop_id}","variable_function_pairs":[["join_0_stop_id",{"type":"UriEncode","inner_function":{"type":"Reference","value":"join_0_stop_id"}}]]}}}}},{"id":"Serialize_5","operator":{"type":"SerializerOp","config":{"template":"?tm0_sm ?tm0_p0_0 ?tm0_o0_0 .","format":"NQuads"}}},{"id":"Sink_6","operator":{"type":"TargetOp","config":{"target_type":"StdOut","data_format":"NQuads"}}}],"node_holes":[],"edge_property":"directed","edges":[[0,1,{"fragment":"default"}],[1,2,{"fragment":"default"}],[2,3,{"fragment":"join_0"}],[2,3,{"fragment":"join_0"}],[3,4,{"fragment":"default"}],[4,5,{"fragment":"default"}],[5,6,{"fragment":"default"}]]}
Loading

0 comments on commit 5f3476b

Please sign in to comment.