Skip to content

Commit 62edeac

Browse files
authored
Merge pull request #18 from cgmeyer/develop
updates
2 parents 1d7ae74 + 96372ee commit 62edeac

File tree

1 file changed

+63
-1
lines changed

1 file changed

+63
-1
lines changed

expansion/expansion.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def get_project_tsvs(self, projects=None, nodes=None, outdir='project_tsvs', ove
294294
return output
295295

296296
# Query Functions
297-
def paginate_query(self, node, project_id=None, props=['id','submitter_id'], chunk_size=10000, format='json',args=None):
297+
def paginate_query_old(self, node, project_id=None, props=['id','submitter_id'], chunk_size=10000, format='json',args=None):
298298
"""Function to paginate a query to avoid time-outs.
299299
Returns a json of all the records in the node.
300300
@@ -392,6 +392,68 @@ def paginate_query(self, node, project_id=None, props=['id','submitter_id'], chu
392392
else:
393393
return total
394394

395+
def paginate_query(self, node, project_id=None, props=[], args=None, chunk_size=5000, offset=0, format='json'):
396+
"""Function to paginate a query to avoid time-outs.
397+
Returns a json of all the records in the node.
398+
399+
Args:
400+
node (str): The node to query.
401+
project_id(str): The project_id to limit the query to. Default is None.
402+
props(list): A list of properties in the node to return.
403+
chunk_size(int): The number of records to return per query. Default is 10000.
404+
args(str): Put graphQL arguments here. For example, 'with_path_to:{type:"case",submitter_id:"case-01"}', etc. Don't enclose in parentheses.
405+
Example:
406+
paginate_query('demographic')
407+
"""
408+
props = list(set(['id','submitter_id']+props))
409+
properties = ' '.join(map(str,props))
410+
411+
if project_id is not None:
412+
if args is None:
413+
query_txt = """{%s (first: %s, offset: %s, project_id:"%s"){%s}}""" % (node, chunk_size, offset, project_id, properties)
414+
else:
415+
query_txt = """{%s (first: %s, offset: %s, project_id:"%s", %s){%s}}""" % (node, chunk_size, offset, project_id, args, properties)
416+
else:
417+
if args is None:
418+
query_txt = """{%s (first: %s, offset: %s){%s}}""" % (node, chunk_size, offset, properties)
419+
else:
420+
query_txt = """{%s (first: %s, offset: %s, %s){%s}}""" % (node, chunk_size, offset, args, properties)
421+
422+
total = {}
423+
total['data'] = {}
424+
total['data'][node] = []
425+
426+
records = list(range(chunk_size))
427+
while len(records) == chunk_size:
428+
429+
res = self.sub.query(query_txt)
430+
431+
if 'data' in res:
432+
records = res['data'][node]
433+
total['data'][node] += records # res['data'][node] should be a list
434+
offset += chunk_size
435+
436+
elif 'error' in res:
437+
print(res['error'])
438+
if chunk_size > 1:
439+
chunk_size = int(chunk_size/2)
440+
print("\tHalving chunk_size to: {}.".format(chunk_size))
441+
else:
442+
print("\tQuery timing out with chunk_size of 1!")
443+
exit(1)
444+
445+
else:
446+
print("Query Error: {}".format(res))
447+
448+
print("\tTotal records retrieved: {}".format(len(total['data'][node])))
449+
450+
if format is 'tsv':
451+
df = json_normalize(total['data'][node])
452+
return df
453+
else:
454+
return total
455+
456+
395457
def get_uuids_in_node(self,node,project_id):
396458
"""
397459
This function returns a list of all the UUIDs of records

0 commit comments

Comments
 (0)