From 76134b074a3ef471535ec0d1fcac38f4f6a56592 Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Tue, 11 Jun 2024 16:15:25 +0200 Subject: [PATCH 01/10] style (): Add rustfmt.toml to project root --- .rustfmt.toml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .rustfmt.toml diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..b62c6d4 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,10 @@ + +tab_spaces=4 +reorder_imports = true +imports_granularity = "Module" +group_imports = "StdExternalCrate" +struct_field_align_threshold = 20 +use_field_init_shorthand = true +enum_discrim_align_threshold = 20 +force_multiline_blocks = true +max_width = 80 From bec42a13fd2a3478ff18cf3003413696fd165e06 Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Wed, 10 Jul 2024 10:51:36 +0200 Subject: [PATCH 02/10] fix (translator/shexml): Fix errors in path and RDF quads pattern generation --- translator/src/shexml/mod.rs | 2 +- translator/src/shexml/operators/source.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/translator/src/shexml/mod.rs b/translator/src/shexml/mod.rs index 49c581d..ea41b5f 100644 --- a/translator/src/shexml/mod.rs +++ b/translator/src/shexml/mod.rs @@ -333,7 +333,7 @@ fn add_serializer_op_from_quads( }; let single_bgp = format!( - "?{} <{}{}> ?{} {}.", + "?{} <{}{}> ?{} {} .", subj_variable, pred_prefix_uri, pred.local, diff --git a/translator/src/shexml/operators/source.rs b/translator/src/shexml/operators/source.rs index 2e939ae..9189272 100644 --- a/translator/src/shexml/operators/source.rs +++ b/translator/src/shexml/operators/source.rs @@ -27,7 +27,7 @@ impl<'a> OperatorTranslator .values() .map(|source| { let mut config = HashMap::new(); - config.insert("url".to_string(), source.uri.clone()); + config.insert("path".to_string(), source.uri.clone()); let source_type_res = match &source.source_type { shexml_interpreter::SourceType::File => Ok(IOType::File), unsupported_type => { From 3357aa19df79ab31afebbfd815c1f9dbdca36bf7 Mon Sep 17 00:00:00 2001 From: Gerald Haesendonck Date: Tue, 16 Jul 2024 13:11:37 +0000 Subject: [PATCH 03/10] Conditionally build bindings --- .gitignore | 1 + .gitlab-ci.yml | 26 ++++++++++++++++++++++---- Cargo.toml | 6 +++--- README.md | 8 ++++++++ build.sh | 27 --------------------------- build_java.sh | 12 ++++++++++++ build_nodejs.sh | 16 ++++++++++++++++ build_python.sh | 12 ++++++++++++ src/java/Translator.class | Bin 1722 -> 1702 bytes src/java/test.sh | 2 +- src/lib.rs | 11 +++++++++-- 11 files changed, 84 insertions(+), 37 deletions(-) delete mode 100755 build.sh create mode 100755 build_java.sh create mode 100755 build_nodejs.sh create mode 100755 build_python.sh diff --git a/.gitignore b/.gitignore index 1f38312..03d5877 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ test_resources/ node_modules/ src/python/ltranslator.so +.idea/ \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 30bbde5..431aabf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,13 +4,31 @@ test: script: - cargo test --verbose --jobs 1 -build: +build-no-bindings: + image: rustdocker/rust:stable + stage: build + script: + - cargo build + +build-java: image: rustdocker/rust:stable stage: build script: - apt update - apt install -y openjdk-17-jdk curl - - curl -fsSL https://deb.nodesource.com/setup_20.x | sudo bash - - - sudo apt-get install -y nodejs - - ./build.sh + - ./build_java.sh + +build-python: + image: rustdocker/rust:stable + stage: build + script: + - ./build_python.sh +build-nodejs: + image: rustdocker/rust:stable + stage: build + script: + - curl -fsSL https://deb.nodesource.com/setup_20.x | sudo bash - + - apt update + - sudo apt-get install -y nodejs + - ./build_nodejs.sh diff --git a/Cargo.toml b/Cargo.toml index 04cf0d2..4674ded 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,9 +47,9 @@ walkdir = "2.4.0" colored = "2.0.4" log4rs.workspace = true log.workspace = true -jni = "0.21.1" -neon = "1.0.0" -pyo3 = { version = "0.21.2", features = ["extension-module"] } +jni = { version = "0.21.1" , optional = true} +neon = { version = "1.0.0", optional = true } +pyo3 = { version = "0.21.2", features = ["extension-module"], optional = true } [lib] name = "ltranslator" diff --git a/README.md b/README.md index daa8863..880fce0 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,14 @@ The following features are not supported in translation yet: 4) Functions 5) Conditionals +## Bindings +AlgeMapLoom provides bindings for Java, Python and Node.js. +These can be enabled with the features `jni`, `pyo3` and `neon` respectively. + +If you build from source, you can run the `build_java.sh`, `build_python.sh` and `build_nodejs.sh` +scripts respectively. + +For usage, check out the [src/java](src/java), [src/python](src/python), and [src/nodejs](src/nodejs) folders. ## Acknowledgement diff --git a/build.sh b/build.sh deleted file mode 100755 index 6e08c99..0000000 --- a/build.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh -set -e - -# Rust build -echo "==> Rust building and bindings" -cargo build - -# Java bindings -echo "==> Java bindings" -# Compile Translator CLI -javac src/java/Translator.java - -# NodeJS bindings -echo "==> NodeJS bindings" -# Install NodeJS NEON dependencies -npm i -# Execute NEON on Rust library to generate index.node file for NodeJS dynamic library -./node_modules/.bin/neon dist -n translator -v -f target/debug/libltranslator.so -# Move index.node to the right folder after generation -mv index.node src/nodejs - -# Python bindings -echo "==> Python bindings" -# Native import, but requires renaming -cp target/debug/libltranslator.so src/python/ltranslator.so - -echo "Done!" diff --git a/build_java.sh b/build_java.sh new file mode 100755 index 0000000..ccadbf6 --- /dev/null +++ b/build_java.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +set -e + +# Rust build +echo "==> Rust building and bindings" +cargo build --lib --release --features=jni + +# Java bindings +echo "==> Java bindings" +# Compile Translator CLI +javac src/java/Translator.java diff --git a/build_nodejs.sh b/build_nodejs.sh new file mode 100755 index 0000000..e5cfa91 --- /dev/null +++ b/build_nodejs.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -e + +# Rust build +echo "==> Rust building and bindings" +cargo build --lib --release --features=neon + +# NodeJS bindings +echo "==> NodeJS bindings" +# Install NodeJS NEON dependencies +npm i +# Execute NEON on Rust library to generate index.node file for NodeJS dynamic library +./node_modules/.bin/neon dist -n translator -v -f target/release/libltranslator.so +# Move index.node to the right folder after generation +mv index.node src/nodejs diff --git a/build_python.sh b/build_python.sh new file mode 100755 index 0000000..1917fa6 --- /dev/null +++ b/build_python.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +set -e + +# Rust build +echo "==> Rust building and bindings" +cargo build --lib --release --features=pyo3 + +# Python bindings +echo "==> Python bindings" +# Native import, but requires renaming +cp target/release/libltranslator.so src/python/ltranslator.so diff --git a/src/java/Translator.class b/src/java/Translator.class index 9c2e5d906862050c83fc320c4abe9913eb96874f..4c53832e92c46e8660a292a011a051f9eded05d2 100644 GIT binary patch delta 577 zcmW-d%Wo246vcnv48zcI8qzjC8X9X!2&I(LTB`!BFO*6}d{m0HJ~|qLC=LlT8h8E; zW@2LEUe~y28Wt|PbfbIs{tqVBcwupK?mgeh{m!}l(a)pS_kaCwz#6+f>p-Q(rJuLS zu%I~p%6Zu+cHC!)SY*lM zw9PVSg1ojmeb(k2=M|yG+mJJ zukHAqu0@oqHrHg|FKt^@f)#ytEE;Ch;vO}Vt%2cs@KZm~BWwpB_2f~7U3T3@x7c<( zui=?IR3w7GdO|mO9Q+7nCh};K6+%)5VHBkxY8*?g%RHEbRhHakM!FHMQx<&*R8-W4 zaGjuPEKM$__lSRCzWSA9rcWC7$ZGqn1fPu9VovA_8DVIMU*4zK6GkzJYz@ybf-xtp&Ml!dZgWR4B9gmO_jxcBDzWrP HEKU9g{{~_q delta 658 zcmXw$%~BIV6otReOz5N?1VSlLQj}6g2q7T;AipZ&U!sBvLWqit)Tq&zl*wSZ;~}Pe z1zpDlm12P<3l}_!Z{PyTJ5d*1r_Z@(Zl8PSOY3f{IXK$g1LoMQn|;-7&2rE4hB9s)+mlh!>}g8xHdf zDXhwtALEwdSm}SgQZ-_@<8W7AI*6>P#o(;9VZ~Sp3id!pTMK!^8U@YMF!ZzFv$cOh z;bhBYuR33HYc;Q?d7(%KKdhvsaf3ZO(>Y5-<^)?#lok}(;K)9i$_WpdsLF)7geWE!M*8HF7#L>f$_f7120+&*Os(IM%vzYjN< z5&v99e06!>Fv29sCg!E{Bo7ENPoaqs!b~~B|I#Wcj*GLMUg62$SG03GOSgP+ibta5 z1=j^%(uc=s-b(%t^h+I4S`O*e7>(}8X_D)jg+m!m;J5UBC>~oTPZE<}&5elC-`s`l sByqFB?Ybzcna4Eu7biL-aV#E7A;J>Nf-$kI%6Y Date: Fri, 2 Aug 2024 13:07:30 +0200 Subject: [PATCH 04/10] fix (translator/rml): Add base iri extraction to the Document struct --- rml-interpreter/src/extractors/io.rs | 26 ++++++++++++++++++++++---- rml-interpreter/src/rml_model/mod.rs | 3 ++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/rml-interpreter/src/extractors/io.rs b/rml-interpreter/src/extractors/io.rs index 521c861..785ea9a 100644 --- a/rml-interpreter/src/extractors/io.rs +++ b/rml-interpreter/src/extractors/io.rs @@ -1,5 +1,5 @@ use std::fs::File; -use std::io::{BufRead, BufReader}; +use std::io::{BufRead, BufReader, Read, Seek}; use std::path::PathBuf; use sophia_api::triple::stream::TripleSource; @@ -11,6 +11,10 @@ use super::triplesmap_extractor::extract_triples_maps; use super::ExtractorResult; use crate::rml_model::Document; +fn extract_base_iri(input: &str) -> Option { + input.strip_prefix("@base").map(|e| e.to_string()) +} + pub fn load_graph_bread(buf_read: impl BufRead) -> ExtractorResult { match turtle::parse_bufread(buf_read).collect_triples() { Ok(it) => Ok(it), @@ -38,7 +42,11 @@ pub fn load_graph_str(input_str: &str) -> ExtractorResult { pub fn parse_str(input_str: &str) -> ExtractorResult { let graph = load_graph_str(input_str)?; let triples_maps = extract_triples_maps(&graph)?; - Ok(Document { triples_maps }) + let base_iri = input_str.split('\n').filter_map(extract_base_iri).next(); + Ok(Document { + triples_maps, + default_base_iri: base_iri, + }) } pub fn parse_file(path: PathBuf) -> ExtractorResult { @@ -50,9 +58,19 @@ pub fn parse_file(path: PathBuf) -> ExtractorResult { ))); } - let buf_read = BufReader::new(File::open(path)?); + let buf_read = BufReader::new(File::open(path.clone())?); let triples_maps = extract_triples_maps(&load_graph_bread(buf_read)?)?; - return Ok(Document { triples_maps }); + + // TODO: Refactor extraction of base iri from RML file <02-08-24, SMO> // + let mut buf_read = BufReader::new(File::open(path)?); + let mut input_string = String::default(); + buf_read.read_to_string(&mut input_string)?; + let base_iri = extract_base_iri(&input_string); + + return Ok(Document { + triples_maps, + default_base_iri: base_iri, + }); } Err(ParseError::IOErrorStr(format!( diff --git a/rml-interpreter/src/rml_model/mod.rs b/rml-interpreter/src/rml_model/mod.rs index 3730fa0..6577975 100644 --- a/rml-interpreter/src/rml_model/mod.rs +++ b/rml-interpreter/src/rml_model/mod.rs @@ -7,7 +7,8 @@ pub mod term_map; #[derive(Debug, Clone)] pub struct Document { - pub triples_maps: Vec, + pub default_base_iri: Option, + pub triples_maps: Vec, } #[derive(Debug, Clone)] From 05cf90773e26b596deaab456558c206f61404407 Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Fri, 2 Aug 2024 14:53:08 +0200 Subject: [PATCH 05/10] fix (shexml-interpreter): Re-add removed imports for test cases --- shexml-interpreter/src/parser/tests.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shexml-interpreter/src/parser/tests.rs b/shexml-interpreter/src/parser/tests.rs index 78d88d0..a3d460e 100644 --- a/shexml-interpreter/src/parser/tests.rs +++ b/shexml-interpreter/src/parser/tests.rs @@ -1,6 +1,9 @@ #[cfg(test)] use std::collections::HashSet; +use chumsky::prelude::*; +use crate::*; + fn assert_parse_expected( parsed_items: Option, expected_items: Option, From d8215765a9e1fd13626d543aacbad4f0765cd766 Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Fri, 2 Aug 2024 15:52:02 +0200 Subject: [PATCH 06/10] feat (translator): Add base iri values to IRI struct to enable usage of base iris --- operator/src/lib.rs | 1 + translator/src/rmlalgebra/mod.rs | 15 +++++++++++---- translator/src/rmlalgebra/operators/extend.rs | 19 +++++++++++++++---- translator/src/shexml/mod.rs | 1 + .../src/shexml/operators/extend/term.rs | 1 + 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/operator/src/lib.rs b/operator/src/lib.rs index 64157b3..584777f 100644 --- a/operator/src/lib.rs +++ b/operator/src/lib.rs @@ -307,6 +307,7 @@ pub enum Function { inner_function: RcExtendFunction, }, Iri { + base_iri: Option, inner_function: RcExtendFunction, }, Literal { diff --git a/translator/src/rmlalgebra/mod.rs b/translator/src/rmlalgebra/mod.rs index b760a10..4795bb9 100644 --- a/translator/src/rmlalgebra/mod.rs +++ b/translator/src/rmlalgebra/mod.rs @@ -33,6 +33,7 @@ pub struct OptimizedRMLDocumentTranslator; impl LanguageTranslator for OptimizedRMLDocumentTranslator { fn translate_to_plan(doc: Document) -> crate::LanguageTranslateResult { + let base_iri = doc.default_base_iri.clone(); let mut plan = Plan::<()>::new(); let tm_projected_pairs_res: Result, PlanError> = doc @@ -92,6 +93,7 @@ impl LanguageTranslator for OptimizedRMLDocumentTranslator { sm_ref, &search_map, plan, + &base_iri )?; } @@ -101,6 +103,7 @@ impl LanguageTranslator for OptimizedRMLDocumentTranslator { sm_ref, &search_map, plan, + &base_iri )?; } Ok::<(), PlanError>(()) @@ -110,7 +113,7 @@ impl LanguageTranslator for OptimizedRMLDocumentTranslator { let sm_ref = &tm.subject_map; let poms = tm.po_maps.clone(); - add_non_join_related_ops(&poms, sm_ref, &search_map, plan)?; + add_non_join_related_ops(&poms, sm_ref, &search_map, plan, &base_iri)?; Ok::<(), PlanError>(()) })?; @@ -151,6 +154,7 @@ fn add_non_join_related_ops( sm: &SubjectMap, search_map: &SearchMap, plan: &RcRefCellPlan, + base_iri: &Option, ) -> Result<(), PlanError> { if no_join_poms.is_empty() & sm.classes.is_empty() { return Ok(()); @@ -164,7 +168,7 @@ fn add_non_join_related_ops( tms.push(&sm.tm_info); tms.extend(extract_gm_tm_infos(sm, no_join_poms)); - let extend_translator = ExtendTranslator { tms, variable_map }; + let extend_translator = ExtendTranslator { tms, variable_map, base_iri: base_iri.clone() }; let extend_op = extend_translator.translate(); let extended_plan = plan.apply(&extend_op, "ExtendOp")?; let mut next_plan = extended_plan; @@ -206,6 +210,7 @@ fn add_join_related_ops( sm: &SubjectMap, search_map: &SearchMap, plan: &RcRefCellPlan, + base_iri: &Option ) -> Result<(), PlanError> { // HashMap pairing the attribute with the function generated from // PTM's subject map @@ -270,6 +275,7 @@ fn add_join_related_ops( extract_extend_function_from_term_map_info( variable_map, &ptm_sm_info, + base_iri ); let om_extend_attr = variable_map.get(&om.tm_info.identifier).unwrap().clone(); @@ -281,7 +287,7 @@ fn add_join_related_ops( }]; let mut extend_pairs = - translate_extend_pairs(variable_map, sm, &pom_with_joined_ptm); + translate_extend_pairs(variable_map, sm, &pom_with_joined_ptm, base_iri); extend_pairs.insert(om_extend_attr, ptm_sub_function); @@ -521,12 +527,13 @@ mod tests { let variable_map = &generate_variable_map(&Document { triples_maps: triples_map_vec, + default_base_iri: None, }); let mut tms = vec![&triples_map.subject_map.tm_info]; let tms_poms = extract_tm_infos_from_poms(&triples_map.po_maps); tms.extend(tms_poms); - let extend_translator = ExtendTranslator { tms, variable_map }; + let extend_translator = ExtendTranslator { tms, variable_map, base_iri: None }; let extend_op = extend_translator.translate(); println!("{:#?}", extend_op); Ok(()) diff --git a/translator/src/rmlalgebra/operators/extend.rs b/translator/src/rmlalgebra/operators/extend.rs index f2b1425..3d792af 100644 --- a/translator/src/rmlalgebra/operators/extend.rs +++ b/translator/src/rmlalgebra/operators/extend.rs @@ -16,6 +16,7 @@ use crate::OperatorTranslator; pub struct ExtendTranslator<'a> { pub tms: Vec<&'a TermMapInfo>, pub variable_map: &'a HashMap, + pub base_iri: Option, } impl<'a> OperatorTranslator for ExtendTranslator<'a> { @@ -26,6 +27,7 @@ impl<'a> OperatorTranslator for ExtendTranslator<'a> { extract_extend_function_from_term_map_info( self.variable_map, tm_info, + &self.base_iri, ); extend_pairs.insert(variable, function); } @@ -39,8 +41,9 @@ impl<'a> OperatorTranslator for ExtendTranslator<'a> { pub fn extract_extend_function_from_term_map_info( variable_map: &HashMap, tm_info: &TermMapInfo, + base_iri: &Option, ) -> (String, Function) { - let func = extract_function(tm_info); + let func = extract_function(tm_info, base_iri); ( variable_map.get(&tm_info.identifier).unwrap().to_string(), @@ -48,7 +51,10 @@ pub fn extract_extend_function_from_term_map_info( ) } -fn extract_function(tm_info: &TermMapInfo) -> Function { +fn extract_function( + tm_info: &TermMapInfo, + base_iri: &Option, +) -> Function { let term_value = tm_info.term_value.value().to_string(); let value_function: RcExtendFunction = match tm_info.term_map_type { TermMapType::Constant => { @@ -73,7 +79,10 @@ fn extract_function(tm_info: &TermMapInfo) -> Function { .param_om_pairs .iter() .map(|(param, om)| { - (param.to_string(), extract_function(&om.tm_info).into()) + ( + param.to_string(), + extract_function(&om.tm_info, base_iri).into(), + ) }) .collect(); @@ -88,6 +97,7 @@ fn extract_function(tm_info: &TermMapInfo) -> Function { match tm_info.term_type.unwrap() { sophia_api::term::TermKind::Iri => { Function::Iri { + base_iri: base_iri.clone(), inner_function: Function::UriEncode { inner_function: value_function, } @@ -114,6 +124,7 @@ pub fn translate_extend_pairs( variable_map: &HashMap, sm: &SubjectMap, poms: &[PredicateObjectMap], + base_iri: &Option ) -> HashMap { let mut tm_infos = extract_tm_infos_from_poms(poms); tm_infos.push(&sm.tm_info); @@ -122,7 +133,7 @@ pub fn translate_extend_pairs( tm_infos .into_iter() .map(|tm_info| { - extract_extend_function_from_term_map_info(variable_map, tm_info) + extract_extend_function_from_term_map_info(variable_map, tm_info, base_iri) }) .collect() } diff --git a/translator/src/shexml/mod.rs b/translator/src/shexml/mod.rs index 3211d5b..3b6beeb 100644 --- a/translator/src/shexml/mod.rs +++ b/translator/src/shexml/mod.rs @@ -237,6 +237,7 @@ fn add_rename_extend_op_from_quads( } } else { Function::Iri { + base_iri: None, inner_function: subj_term_func.into(), } }; diff --git a/translator/src/shexml/operators/extend/term.rs b/translator/src/shexml/operators/extend/term.rs index 9a97922..efda2a2 100644 --- a/translator/src/shexml/operators/extend/term.rs +++ b/translator/src/shexml/operators/extend/term.rs @@ -21,6 +21,7 @@ pub fn obj_lang_datatype_function( }) } else { Some(Function::Iri { + base_iri:None, inner_function: obj_inner_function.into(), }) } From 9244eb4b404f77cc11b27b9035784c5e49db85bc Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Fri, 2 Aug 2024 16:15:34 +0200 Subject: [PATCH 07/10] chore (config): Add rust-toolchain.toml to have a project standardized rust version for compiling --- rust-toolchain.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 rust-toolchain.toml diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..d01d2ea --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel="1.77.2" From 8b3c4ffb1c014acac7928ff63a046fba89f855aa Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Sat, 3 Aug 2024 00:21:21 +0200 Subject: [PATCH 08/10] fix (rml-interpreter): Fix correct parsing of base iri from buf reader --- rml-interpreter/src/extractors/io.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rml-interpreter/src/extractors/io.rs b/rml-interpreter/src/extractors/io.rs index 785ea9a..46e772c 100644 --- a/rml-interpreter/src/extractors/io.rs +++ b/rml-interpreter/src/extractors/io.rs @@ -12,7 +12,7 @@ use super::ExtractorResult; use crate::rml_model::Document; fn extract_base_iri(input: &str) -> Option { - input.strip_prefix("@base").map(|e| e.to_string()) + input.strip_prefix("@base").map(|e| e[0..e.len()-1].replace(['<', '>'], "").trim().to_string()) } pub fn load_graph_bread(buf_read: impl BufRead) -> ExtractorResult { @@ -65,7 +65,7 @@ pub fn parse_file(path: PathBuf) -> ExtractorResult { let mut buf_read = BufReader::new(File::open(path)?); let mut input_string = String::default(); buf_read.read_to_string(&mut input_string)?; - let base_iri = extract_base_iri(&input_string); + let base_iri = input_string.split('\n').filter_map(extract_base_iri).next(); return Ok(Document { triples_maps, From 815d2a0f29dac38baef2ebb8f6512e938ca71675 Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Mon, 5 Aug 2024 13:43:57 +0200 Subject: [PATCH 09/10] fix (rml-interpreter): Fixed inferring term type for a term map implicitly --- .../src/extractors/objectmap_extractor.rs | 21 +++++++++++++++++-- .../src/extractors/term_map_info_extractor.rs | 16 +++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/rml-interpreter/src/extractors/objectmap_extractor.rs b/rml-interpreter/src/extractors/objectmap_extractor.rs index d30a648..ef7cffe 100644 --- a/rml-interpreter/src/extractors/objectmap_extractor.rs +++ b/rml-interpreter/src/extractors/objectmap_extractor.rs @@ -7,7 +7,9 @@ use super::{ExtractorResult, FromVocab, TermMapExtractor}; use crate::extractors::store::{get_object, get_objects}; use crate::extractors::Extractor; use crate::rml_model::join::JoinCondition; -use crate::rml_model::term_map::{GraphMap, ObjectMap, TermMapInfo}; +use crate::rml_model::term_map::{ + GraphMap, ObjectMap, TermMapInfo, TermMapType, +}; use crate::IriString; fn extract_join_condition( @@ -93,8 +95,23 @@ impl TermMapExtractor for ObjectMap { let mut tm_info = tm_info_res?; if tm_info.term_type.is_none() { - tm_info.term_type = Some(tm_info.term_value.kind()); + let mut inferred_term_type = match tm_info.term_map_type { + TermMapType::Reference => Some(TermKind::Literal), + TermMapType::Template => Some(TermKind::Iri), + _ => None, + }; + + if inferred_term_type.is_none() { + if language.is_some() || data_type.is_some() { + inferred_term_type = Some(TermKind::Literal); + } else { + inferred_term_type = Some(TermKind::Iri); + } + } + + tm_info.term_type = inferred_term_type; } + let graph_maps = GraphMap::extract_many_from_container(graph_ref, subj_ref)?; diff --git a/rml-interpreter/src/extractors/term_map_info_extractor.rs b/rml-interpreter/src/extractors/term_map_info_extractor.rs index 90be172..55c9bec 100644 --- a/rml-interpreter/src/extractors/term_map_info_extractor.rs +++ b/rml-interpreter/src/extractors/term_map_info_extractor.rs @@ -44,7 +44,10 @@ fn extract_term_map_type_value( )); } - let trip = results_query.pop().ok_or(ParseError::GenericError("Term map doesn't have rr:constant, rr:template, rr:reference, fnml:functionValue nor rr:column.".to_string()))?; + let trip = results_query + .pop() + .ok_or(ParseError::GenericError("Term map doesn't have rr:constant, rr:template, rr:reference, fnml:functionValue nor rr:column.".to_string()))?; + let fetched_pred = trip.p(); let term_map_type_res = match fetched_pred { @@ -79,6 +82,7 @@ impl Extractor for TermMapInfo { let mut term_type = None; + //Explicit term type casting trough rr:termtype predicate if let Ok(term_type_soph) = get_object(graph_ref, subj_ref, &term_type_pred) { @@ -100,6 +104,16 @@ impl Extractor for TermMapInfo { }; } + //Implicit term type derivation for constant-valued term maps + if term_map_type == TermMapType::Constant { + term_type = match term_value { + sophia_term::Term::Iri(_) => Some(TermKind::Iri), + sophia_term::Term::BNode(_) => Some(TermKind::BlankNode), + sophia_term::Term::Literal(_) => Some(TermKind::Literal), + sophia_term::Term::Variable(_) => None, + }; + } + let logical_target_iris = get_objects( graph_ref, subj_ref, From 46373c1a2a89eb3ce9aff04872f6ae654e01547a Mon Sep 17 00:00:00 2001 From: Sitt Min Oo Date: Mon, 5 Aug 2024 15:07:22 +0200 Subject: [PATCH 10/10] fix (translator/rml): Fix generation of template extend function with uriencoding --- translator/src/rmlalgebra/operators/extend.rs | 71 ++++++++++++++++--- 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/translator/src/rmlalgebra/operators/extend.rs b/translator/src/rmlalgebra/operators/extend.rs index 3d792af..456dd77 100644 --- a/translator/src/rmlalgebra/operators/extend.rs +++ b/translator/src/rmlalgebra/operators/extend.rs @@ -1,11 +1,13 @@ use std::collections::HashMap; +use lazy_static::lazy_static; use operator::{Extend, Function, Operator, RcExtendFunction}; +use regex::Regex; use rml_interpreter::rml_model::term_map::{ SubjectMap, TermMapInfo, TermMapType, }; use rml_interpreter::rml_model::PredicateObjectMap; -use sophia_api::term::TTerm; +use sophia_api::term::{TTerm, TermKind}; use crate::rmlalgebra::util::{ extract_gm_tm_infos, extract_tm_infos_from_poms, @@ -51,6 +53,54 @@ pub fn extract_extend_function_from_term_map_info( ) } +lazy_static! { + static ref TEMPLATE_REGEX: Regex = Regex::new(r"\{([^\\{\\}]*)\}").unwrap(); +} + +fn extract_template_function( + term_value: String, + term_type: &Option, +) -> Function { + let found_variables = TEMPLATE_REGEX + .captures_iter(&term_value) + .map(|c| c.extract()) + .map(|(_, [var])| var); + + let variable_function_pairs = if *term_type == Some(TermKind::Iri) { + found_variables + .map(|var| { + ( + var.to_string(), + Function::UriEncode { + inner_function: Function::Reference { + value: var.to_string(), + } + .into(), + } + .into(), + ) + }) + .collect() + } else { + found_variables + .map(|var| { + ( + var.to_string(), + Function::Reference { + value: var.to_string(), + } + .into(), + ) + }) + .collect() + }; + + Function::TemplateFunctionValue { + template: term_value, + variable_function_pairs, + } +} + fn extract_function( tm_info: &TermMapInfo, base_iri: &Option, @@ -68,9 +118,7 @@ fn extract_function( } } TermMapType::Template => { - Function::TemplateString { - value: term_value.clone(), - } + extract_template_function(term_value, &tm_info.term_type) } TermMapType::Function => { let fn_map = tm_info.fun_map_opt.as_ref().unwrap(); @@ -97,11 +145,8 @@ fn extract_function( match tm_info.term_type.unwrap() { sophia_api::term::TermKind::Iri => { Function::Iri { - base_iri: base_iri.clone(), - inner_function: Function::UriEncode { - inner_function: value_function, - } - .into(), + base_iri: base_iri.clone(), + inner_function: value_function, } } sophia_api::term::TermKind::Literal => { @@ -124,7 +169,7 @@ pub fn translate_extend_pairs( variable_map: &HashMap, sm: &SubjectMap, poms: &[PredicateObjectMap], - base_iri: &Option + base_iri: &Option, ) -> HashMap { let mut tm_infos = extract_tm_infos_from_poms(poms); tm_infos.push(&sm.tm_info); @@ -133,7 +178,11 @@ pub fn translate_extend_pairs( tm_infos .into_iter() .map(|tm_info| { - extract_extend_function_from_term_map_info(variable_map, tm_info, base_iri) + extract_extend_function_from_term_map_info( + variable_map, + tm_info, + base_iri, + ) }) .collect() }