diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 041d074..e36ca49 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.2 +current_version = 0.6.3 commit = True tag = False diff --git a/CITATION.cff b/CITATION.cff index c005318..e04ffc8 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -20,4 +20,4 @@ url: 'https://atomrdf.pyscal.org' license: "MIT" repository-code: https://github.com/pyscal/atomRDF type: software -version: 0.6.2 +version: 0.6.3 diff --git a/atomrdf/graph.py b/atomrdf/graph.py index be27c7a..01ef300 100644 --- a/atomrdf/graph.py +++ b/atomrdf/graph.py @@ -922,7 +922,7 @@ def unarchive( ontology=ontology, ) - def query(self, inquery): + def query(self, inquery, return_df=True): """ Query the graph using SPARQL @@ -931,6 +931,8 @@ def query(self, inquery): inquery: string SPARQL query to be executed + return_df: bool, optional + if True, returns the results as a pandas DataFrame. Default is True. Returns ------- res: pandas DataFrame @@ -938,11 +940,14 @@ def query(self, inquery): """ res = self.graph.query(inquery) if res is not None: - for line in inquery.split("\n"): - if "SELECT DISTINCT" in line: - break - labels = [x[1:] for x in line.split()[2:]] - return pd.DataFrame(res, columns=labels) + if return_df: + for line in inquery.split("\n"): + if "SELECT DISTINCT" in line: + break + labels = [x[1:] for x in line.split()[2:]] + return pd.DataFrame(res, columns=labels) + else: + return res raise ValueError("SPARQL query returned None") def auto_query( @@ -952,6 +957,7 @@ def auto_query( condition=None, return_query=False, enforce_types=None, + return_df=True, ): """ Automatically generates and executes a query based on the provided parameters. @@ -968,6 +974,8 @@ def auto_query( If True, returns the generated query instead of executing it. Defaults to False. enforce_types : bool, optional If provided, enforces the specified type for the query. Defaults to None. + return_df: bool, optional + if True, returns the results as a pandas DataFrame. Default is True. Returns ------- @@ -982,7 +990,7 @@ def auto_query( ) if return_query: return query - res = self.query(query) + res = self.query(query, return_df=return_df) if len(res) != 0: return res else: @@ -991,7 +999,7 @@ def auto_query( ) if return_query: return query - res = self.query(query) + res = self.query(query, return_df=return_df) return res diff --git a/atomrdf/network/network.py b/atomrdf/network/network.py index bfb2dff..237e545 100644 --- a/atomrdf/network/network.py +++ b/atomrdf/network/network.py @@ -378,16 +378,26 @@ def create_query(self, source, destinations, condition=None, enforce_types=True) The generated SPARQL query string. """ + #if not list, convert to list if not isinstance(destinations, list): destinations = [destinations] + #query name is how its called in SPARQL query source_name = source.query_name + + #same way we have to get destination names + #here a trick is applied: if it is a data property, we have to add "value" to the end, which is done in the query_name property + #now if it is an object property, the query has to end in the target class. destination_names = [] for destination in destinations: - if isinstance(destination, list): + if len(destination._parents) > 0: #this is a list, we need a stepped query - destination = [d.query_name for d in destination] - destination_names.append(destination) + destination_list = [] + for parent in destination._parents: + destination_list.append(parent.query_name) + destination_list.append(destination.query_name) + destination_names.append(destination_list) + destination._parents = [] else: destination_names.append([destination.query_name]) @@ -437,14 +447,13 @@ def create_query(self, source, destinations, condition=None, enforce_types=True) # now add corresponding triples for count, destination in enumerate(destination_names): for triple in all_triplets[str(count)]: - query.append( - " ?%s %s ?%s ." - % ( - self.strip_name(triple[0]), + #print(triple) + line_text = " ?%s %s ?%s ."% ( self.strip_name(triple[0]), triple[1], self.strip_name(triple[2]), ) - ) + if line_text not in query: + query.append(line_text) # we enforce types of the source and destination if enforce_types: diff --git a/atomrdf/network/ontology.py b/atomrdf/network/ontology.py index ca55da1..86e2fe9 100644 --- a/atomrdf/network/ontology.py +++ b/atomrdf/network/ontology.py @@ -48,6 +48,7 @@ def read_ontology(): "data_property", delimiter="#", namespace="rdfs", + rn = ['str'] ) combo.add_term( "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", diff --git a/atomrdf/network/parser.py b/atomrdf/network/parser.py index 5e815db..dce5715 100644 --- a/atomrdf/network/parser.py +++ b/atomrdf/network/parser.py @@ -124,7 +124,7 @@ def _parse_data_property(self): # assign this data for d in dm: if d != "owl:Thing": - self.attributes["class"][d].is_range_of.append(term.name) + self.attributes["class"][d].is_domain_of.append(term.name) # subproperties should be treated the same @@ -166,11 +166,11 @@ def _parse_object_property(self): for d in dm: if d != "07:owl#Thing": if d in self.attributes["class"]: - self.attributes["class"][d].is_range_of.append(term.name) + self.attributes["class"][d].is_domain_of.append(term.name) for r in rn: if r != "07:owl#Thing": - if d in self.attributes["class"]: - self.attributes["class"][d].is_domain_of.append(term.name) + if r in self.attributes["class"]: + self.attributes["class"][r].is_range_of.append(term.name) def _parse_class_basic(self): classes = [] diff --git a/atomrdf/network/term.py b/atomrdf/network/term.py index 558fa2c..4710bbe 100644 --- a/atomrdf/network/term.py +++ b/atomrdf/network/term.py @@ -75,6 +75,7 @@ def __init__( self._namespace = namespace # name of the class self._name = None + self._parents = [] @property def uri(self): @@ -189,6 +190,10 @@ def query_name(self): """ if self.node_type == "data_property": return self.name + "value" + elif self.node_type == "object_property": + if len(self.range) > 0: + #this has a domain + return self.range[0] return self.name @property @@ -240,6 +245,8 @@ def _is_data_node(self): raise TypeError( "This operation can only be performed with a data property!" ) + + def _create_condition_string(self, condition, val): return f'(?{self.query_name_without_prefix}{condition}"{val}"^^xsd:{self._clean_datatype(self.range[0])})' @@ -309,3 +316,7 @@ def __or__(self, term): def or_(self, term): self.__or__(term) + + def __rshift__(self, term): + term._parents.append(self) + return term diff --git a/setup.py b/setup.py index 870f0a9..29f862e 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='atomrdf', - version='0.6.2', + version='0.6.3', author='Abril Azocar Guzman, Sarath Menon', author_email='sarath.menon@pyscal.org', description='Ontology based structural manipulation and quering',