Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make df optional #97

Merged
merged 11 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.2
current_version = 0.6.3
commit = True
tag = False

Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ url: 'https://atomrdf.pyscal.org'
license: "MIT"
repository-code: https://github.com/pyscal/atomRDF
type: software
version: 0.6.2
version: 0.6.3
24 changes: 16 additions & 8 deletions atomrdf/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ def unarchive(
ontology=ontology,
)

def query(self, inquery):
def query(self, inquery, return_df=True):
"""
Query the graph using SPARQL

Expand All @@ -931,18 +931,23 @@ def query(self, inquery):
inquery: string
SPARQL query to be executed

return_df: bool, optional
if True, returns the results as a pandas DataFrame. Default is True.
Returns
-------
res: pandas DataFrame
pandas dataframe results
"""
res = self.graph.query(inquery)
if res is not None:
for line in inquery.split("\n"):
if "SELECT DISTINCT" in line:
break
labels = [x[1:] for x in line.split()[2:]]
return pd.DataFrame(res, columns=labels)
if return_df:
for line in inquery.split("\n"):
if "SELECT DISTINCT" in line:
break
labels = [x[1:] for x in line.split()[2:]]
return pd.DataFrame(res, columns=labels)
else:
return res
raise ValueError("SPARQL query returned None")

def auto_query(
Expand All @@ -952,6 +957,7 @@ def auto_query(
condition=None,
return_query=False,
enforce_types=None,
return_df=True,
):
"""
Automatically generates and executes a query based on the provided parameters.
Expand All @@ -968,6 +974,8 @@ def auto_query(
If True, returns the generated query instead of executing it. Defaults to False.
enforce_types : bool, optional
If provided, enforces the specified type for the query. Defaults to None.
return_df: bool, optional
if True, returns the results as a pandas DataFrame. Default is True.

Returns
-------
Expand All @@ -982,7 +990,7 @@ def auto_query(
)
if return_query:
return query
res = self.query(query)
res = self.query(query, return_df=return_df)
if len(res) != 0:
return res
else:
Expand All @@ -991,7 +999,7 @@ def auto_query(
)
if return_query:
return query
res = self.query(query)
res = self.query(query, return_df=return_df)

return res

Expand Down
25 changes: 17 additions & 8 deletions atomrdf/network/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,16 +378,26 @@ def create_query(self, source, destinations, condition=None, enforce_types=True)
The generated SPARQL query string.

"""
#if not list, convert to list
if not isinstance(destinations, list):
destinations = [destinations]

#query name is how its called in SPARQL query
source_name = source.query_name

#same way we have to get destination names
#here a trick is applied: if it is a data property, we have to add "value" to the end, which is done in the query_name property
#now if it is an object property, the query has to end in the target class.
destination_names = []
for destination in destinations:
if isinstance(destination, list):
if len(destination._parents) > 0:
#this is a list, we need a stepped query
destination = [d.query_name for d in destination]
destination_names.append(destination)
destination_list = []
for parent in destination._parents:
destination_list.append(parent.query_name)
destination_list.append(destination.query_name)
destination_names.append(destination_list)
destination._parents = []
else:
destination_names.append([destination.query_name])

Expand Down Expand Up @@ -437,14 +447,13 @@ def create_query(self, source, destinations, condition=None, enforce_types=True)
# now add corresponding triples
for count, destination in enumerate(destination_names):
for triple in all_triplets[str(count)]:
query.append(
" ?%s %s ?%s ."
% (
self.strip_name(triple[0]),
#print(triple)
line_text = " ?%s %s ?%s ."% ( self.strip_name(triple[0]),
triple[1],
self.strip_name(triple[2]),
)
)
if line_text not in query:
query.append(line_text)

# we enforce types of the source and destination
if enforce_types:
Expand Down
1 change: 1 addition & 0 deletions atomrdf/network/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def read_ontology():
"data_property",
delimiter="#",
namespace="rdfs",
rn = ['str']
)
combo.add_term(
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
Expand Down
8 changes: 4 additions & 4 deletions atomrdf/network/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _parse_data_property(self):
# assign this data
for d in dm:
if d != "owl:Thing":
self.attributes["class"][d].is_range_of.append(term.name)
self.attributes["class"][d].is_domain_of.append(term.name)

# subproperties should be treated the same

Expand Down Expand Up @@ -166,11 +166,11 @@ def _parse_object_property(self):
for d in dm:
if d != "07:owl#Thing":
if d in self.attributes["class"]:
self.attributes["class"][d].is_range_of.append(term.name)
self.attributes["class"][d].is_domain_of.append(term.name)
for r in rn:
if r != "07:owl#Thing":
if d in self.attributes["class"]:
self.attributes["class"][d].is_domain_of.append(term.name)
if r in self.attributes["class"]:
self.attributes["class"][r].is_range_of.append(term.name)

def _parse_class_basic(self):
classes = []
Expand Down
11 changes: 11 additions & 0 deletions atomrdf/network/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(
self._namespace = namespace
# name of the class
self._name = None
self._parents = []

@property
def uri(self):
Expand Down Expand Up @@ -189,6 +190,10 @@ def query_name(self):
"""
if self.node_type == "data_property":
return self.name + "value"
elif self.node_type == "object_property":
if len(self.range) > 0:
#this has a domain
return self.range[0]
return self.name

@property
Expand Down Expand Up @@ -240,6 +245,8 @@ def _is_data_node(self):
raise TypeError(
"This operation can only be performed with a data property!"
)



def _create_condition_string(self, condition, val):
return f'(?{self.query_name_without_prefix}{condition}"{val}"^^xsd:{self._clean_datatype(self.range[0])})'
Expand Down Expand Up @@ -309,3 +316,7 @@ def __or__(self, term):

def or_(self, term):
self.__or__(term)

def __rshift__(self, term):
term._parents.append(self)
return term
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='atomrdf',
version='0.6.2',
version='0.6.3',
author='Abril Azocar Guzman, Sarath Menon',
author_email='[email protected]',
description='Ontology based structural manipulation and quering',
Expand Down
Loading