diff --git a/arrow_python_utils/src/to_rust.rs b/arrow_python_utils/src/to_rust.rs index 8a3da69..a46837e 100644 --- a/arrow_python_utils/src/to_rust.rs +++ b/arrow_python_utils/src/to_rust.rs @@ -79,7 +79,8 @@ pub fn polars_df_to_rust_df(df: &PyAny) -> PyResult { } pub fn array_to_rust_df(rb: &[&PyAny]) -> PyResult { - let schema = rb.first() + let schema = rb + .first() .ok_or_else(|| ToRustError::Other("empty table".into()))? .getattr("schema")?; let names = schema.getattr("names")?.extract::>()?; diff --git a/maplib/src/mapping.rs b/maplib/src/mapping.rs index b893edf..8b7e2c5 100644 --- a/maplib/src/mapping.rs +++ b/maplib/src/mapping.rs @@ -129,8 +129,7 @@ impl Mapping { base_iri: Option, transient: bool, ) -> Result<(), MappingError> { - self - .triplestore + self.triplestore .read_triples(p, base_iri, transient) .map_err(MappingError::TriplestoreError) } diff --git a/triplestore/src/lib.rs b/triplestore/src/lib.rs index 04abba8..6cfc7c8 100644 --- a/triplestore/src/lib.rs +++ b/triplestore/src/lib.rs @@ -45,6 +45,7 @@ pub struct Triplestore { pub(crate) caching_folder: Option, df_map: HashMap>, transient_df_map: HashMap>, + parser_call: usize, } pub struct TripleTable { @@ -131,6 +132,7 @@ impl Triplestore { transient_df_map: HashMap::new(), deduplicated: true, caching_folder, + parser_call: 0, }) } diff --git a/triplestore/src/sparql/lazy_graph_patterns/join.rs b/triplestore/src/sparql/lazy_graph_patterns/join.rs index a79d7e2..8866b8f 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/join.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/join.rs @@ -54,7 +54,8 @@ impl Triplestore { let mut join_on: Vec<_> = left_solution_mappings .columns - .intersection(&right_columns).cloned() + .intersection(&right_columns) + .cloned() .collect(); join_on.sort(); diff --git a/triplestore/src/sparql/lazy_graph_patterns/left_join.rs b/triplestore/src/sparql/lazy_graph_patterns/left_join.rs index 2b6d628..a08fa8f 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/left_join.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/left_join.rs @@ -1,8 +1,6 @@ use super::Triplestore; use crate::sparql::errors::SparqlError; -use crate::sparql::multitype::{ - create_join_compatible_solution_mappings, join_workaround, -}; +use crate::sparql::multitype::{create_join_compatible_solution_mappings, join_workaround}; use crate::sparql::query_context::{Context, PathEntry}; use crate::sparql::solution_mapping::{is_string_col, SolutionMappings}; use log::debug; @@ -51,9 +49,7 @@ impl Triplestore { rdf_node_types: left_datatypes, } = left_solution_mappings; - let mut join_on: Vec<_> = left_columns - .intersection(&right_columns).cloned() - .collect(); + let mut join_on: Vec<_> = left_columns.intersection(&right_columns).cloned().collect(); join_on.sort(); let (mut left_mappings, mut left_datatypes, mut right_mappings, right_datatypes) = diff --git a/triplestore/src/sparql/lazy_graph_patterns/path.rs b/triplestore/src/sparql/lazy_graph_patterns/path.rs index 74d3a30..a4ca2c6 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/path.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/path.rs @@ -761,7 +761,9 @@ fn sparse_path( dt_obj: dt_obj_left, }) } - } else { res_right } + } else { + res_right + } } PropertyPathExpression::ZeroOrMore(inner) => { if let Some(SparsePathReturn { diff --git a/triplestore/src/sparql/lazy_graph_patterns/triple.rs b/triplestore/src/sparql/lazy_graph_patterns/triple.rs index ec209e5..8726798 100644 --- a/triplestore/src/sparql/lazy_graph_patterns/triple.rs +++ b/triplestore/src/sparql/lazy_graph_patterns/triple.rs @@ -124,7 +124,8 @@ impl Triplestore { { let overlap: Vec<_> = colnames .iter() - .filter(|x| columns.contains(*x)).cloned() + .filter(|x| columns.contains(*x)) + .cloned() .collect(); if height_0 { // Important that overlapping cols are dropped from mappings and not from lf, @@ -162,13 +163,8 @@ impl Triplestore { mappings = mappings.with_column(col(c).cast(DataType::Categorical(None))); } - mappings = join_workaround( - mappings, - &rdf_node_types, - lf, - &dts, - JoinType::Inner.into(), - ); + mappings = + join_workaround(mappings, &rdf_node_types, lf, &dts, JoinType::Inner.into()); } else { mappings = mappings.join(lf, [], [], JoinType::Cross.into()); } diff --git a/triplestore/src/sparql/multitype.rs b/triplestore/src/sparql/multitype.rs index 0c2c6d9..998cf8c 100644 --- a/triplestore/src/sparql/multitype.rs +++ b/triplestore/src/sparql/multitype.rs @@ -20,7 +20,6 @@ pub const MULTI_BLANK_DT: &str = "B"; pub const MULTI_PLACEHOLDER_LANG: &str = "?"; pub fn convert_lf_col_to_multitype(lf: LazyFrame, c: &str, dt: &RDFNodeType) -> LazyFrame { - match dt { RDFNodeType::IRI => lf.with_column( as_struct(vec![ @@ -420,8 +419,12 @@ pub fn split_df_multicols( } pub fn lf_printer(lf: &LazyFrame) { + let df = lf_destruct(lf); + println!("DF: {}", df); +} + +pub fn lf_destruct(lf: &LazyFrame) -> DataFrame { let df = lf.clone().collect().unwrap(); - println!("DF datatypes {:?}", df.dtypes()); let colnames: Vec<_> = df .get_column_names() .iter() @@ -466,7 +469,7 @@ pub fn lf_printer(lf: &LazyFrame) { series_vec.push(ser.clone()); } } - println!("DF: {}", DataFrame::new(series_vec).unwrap()); + DataFrame::new(series_vec).unwrap() } pub fn join_workaround( diff --git a/triplestore/src/sparql/solution_mapping.rs b/triplestore/src/sparql/solution_mapping.rs index de0043c..f27fc10 100644 --- a/triplestore/src/sparql/solution_mapping.rs +++ b/triplestore/src/sparql/solution_mapping.rs @@ -28,9 +28,7 @@ pub fn is_string_col(rdf_node_type: &RDFNodeType) -> bool { match rdf_node_type { RDFNodeType::IRI => true, RDFNodeType::BlankNode => true, - RDFNodeType::Literal(lit) => { - lit.as_ref() == xsd::STRING - } + RDFNodeType::Literal(lit) => lit.as_ref() == xsd::STRING, RDFNodeType::MultiType => false, RDFNodeType::None => false, } diff --git a/triplestore/src/triples_read.rs b/triplestore/src/triples_read.rs index fd8bf3c..c9bfc8e 100644 --- a/triplestore/src/triples_read.rs +++ b/triplestore/src/triples_read.rs @@ -25,6 +25,7 @@ impl Triplestore { ) -> Result<(), TriplestoreError> { //Copied from the documentation of rio_turtle let mut predicate_map = HashMap::new(); + let parser_call = self.parser_call.to_string(); let parse_func = &mut |t: rio_api::model::Triple| { let verb_key = t.predicate.iri; if !predicate_map.contains_key(verb_key) { @@ -39,8 +40,8 @@ impl Triplestore { } let (subjects, objects) = type_map.get_mut(&(types_tuple)).unwrap(); - subjects.push(rio_subject_to_oxrdf_subject(&t.subject)); - objects.push(rio_term_to_oxrdf_term(&t.object)); + subjects.push(rio_subject_to_oxrdf_subject(&t.subject, &parser_call)); + objects.push(rio_term_to_oxrdf_term(&t.object, &parser_call)); Ok(()) }; @@ -55,9 +56,7 @@ impl Triplestore { if path.extension() == Some("ttl".as_ref()) { let mut tparser = TurtleParser::new( - BufReader::new( - File::open(path).map_err(TriplestoreError::ReadTriplesFileError)?, - ), + BufReader::new(File::open(path).map_err(TriplestoreError::ReadTriplesFileError)?), base_iri, ); tparser @@ -121,18 +120,19 @@ impl Triplestore { }); } } + self.parser_call += 1; self.add_triples_vec(triples_to_add, &uuid::Uuid::new_v4().to_string(), transient)?; Ok(()) } } -fn rio_term_to_oxrdf_term(t: &rio_api::model::Term) -> oxrdf::Term { +fn rio_term_to_oxrdf_term(t: &rio_api::model::Term, parser_call: &str) -> oxrdf::Term { match t { rio_api::model::Term::NamedNode(nn) => { oxrdf::Term::NamedNode(rio_named_node_to_oxrdf_named_node(nn)) } rio_api::model::Term::BlankNode(bn) => { - oxrdf::Term::BlankNode(rio_blank_node_to_oxrdf_blank_node(bn)) + oxrdf::Term::BlankNode(rio_blank_node_to_oxrdf_blank_node(bn, parser_call)) } rio_api::model::Term::Literal(l) => oxrdf::Term::Literal(rio_literal_to_oxrdf_literal(l)), rio_api::model::Term::Triple(_) => { @@ -153,13 +153,13 @@ fn rio_literal_to_oxrdf_literal(l: &rio_api::model::Literal) -> oxrdf::Literal { } } -fn rio_subject_to_oxrdf_subject(s: &rio_api::model::Subject) -> oxrdf::Subject { +fn rio_subject_to_oxrdf_subject(s: &rio_api::model::Subject, parser_call: &str) -> oxrdf::Subject { match s { rio_api::model::Subject::NamedNode(nn) => { oxrdf::Subject::NamedNode(rio_named_node_to_oxrdf_named_node(nn)) } rio_api::model::Subject::BlankNode(bn) => { - oxrdf::Subject::BlankNode(rio_blank_node_to_oxrdf_blank_node(bn)) + oxrdf::Subject::BlankNode(rio_blank_node_to_oxrdf_blank_node(bn, parser_call)) } rio_api::model::Subject::Triple(_) => { todo!() @@ -167,8 +167,11 @@ fn rio_subject_to_oxrdf_subject(s: &rio_api::model::Subject) -> oxrdf::Subject { } } -fn rio_blank_node_to_oxrdf_blank_node(bn: &rio_api::model::BlankNode) -> oxrdf::BlankNode { - oxrdf::BlankNode::new_unchecked(bn.id) +fn rio_blank_node_to_oxrdf_blank_node( + bn: &rio_api::model::BlankNode, + parser_call: &str, +) -> oxrdf::BlankNode { + oxrdf::BlankNode::new_unchecked(format!("{}_{}", bn.id, parser_call)) } fn rio_named_node_to_oxrdf_named_node(nn: &rio_api::model::NamedNode) -> oxrdf::NamedNode {