@@ -294,7 +294,7 @@ def get_project_tsvs(self, projects=None, nodes=None, outdir='project_tsvs', ove
294294 return output
295295
296296# Query Functions
297- def paginate_query (self , node , project_id = None , props = ['id' ,'submitter_id' ], chunk_size = 10000 , format = 'json' ,args = None ):
297+ def paginate_query_old (self , node , project_id = None , props = ['id' ,'submitter_id' ], chunk_size = 10000 , format = 'json' ,args = None ):
298298 """Function to paginate a query to avoid time-outs.
299299 Returns a json of all the records in the node.
300300
@@ -392,6 +392,68 @@ def paginate_query(self, node, project_id=None, props=['id','submitter_id'], chu
392392 else :
393393 return total
394394
395+ def paginate_query (self , node , project_id = None , props = [], args = None , chunk_size = 5000 , offset = 0 , format = 'json' ):
396+ """Function to paginate a query to avoid time-outs.
397+ Returns a json of all the records in the node.
398+
399+ Args:
400+ node (str): The node to query.
401+ project_id(str): The project_id to limit the query to. Default is None.
402+ props(list): A list of properties in the node to return.
403+ chunk_size(int): The number of records to return per query. Default is 10000.
404+ args(str): Put graphQL arguments here. For example, 'with_path_to:{type:"case",submitter_id:"case-01"}', etc. Don't enclose in parentheses.
405+ Example:
406+ paginate_query('demographic')
407+ """
408+ props = list (set (['id' ,'submitter_id' ]+ props ))
409+ properties = ' ' .join (map (str ,props ))
410+
411+ if project_id is not None :
412+ if args is None :
413+ query_txt = """{%s (first: %s, offset: %s, project_id:"%s"){%s}}""" % (node , chunk_size , offset , project_id , properties )
414+ else :
415+ query_txt = """{%s (first: %s, offset: %s, project_id:"%s", %s){%s}}""" % (node , chunk_size , offset , project_id , args , properties )
416+ else :
417+ if args is None :
418+ query_txt = """{%s (first: %s, offset: %s){%s}}""" % (node , chunk_size , offset , properties )
419+ else :
420+ query_txt = """{%s (first: %s, offset: %s, %s){%s}}""" % (node , chunk_size , offset , args , properties )
421+
422+ total = {}
423+ total ['data' ] = {}
424+ total ['data' ][node ] = []
425+
426+ records = list (range (chunk_size ))
427+ while len (records ) == chunk_size :
428+
429+ res = self .sub .query (query_txt )
430+
431+ if 'data' in res :
432+ records = res ['data' ][node ]
433+ total ['data' ][node ] += records # res['data'][node] should be a list
434+ offset += chunk_size
435+
436+ elif 'error' in res :
437+ print (res ['error' ])
438+ if chunk_size > 1 :
439+ chunk_size = int (chunk_size / 2 )
440+ print ("\t Halving chunk_size to: {}." .format (chunk_size ))
441+ else :
442+ print ("\t Query timing out with chunk_size of 1!" )
443+ exit (1 )
444+
445+ else :
446+ print ("Query Error: {}" .format (res ))
447+
448+ print ("\t Total records retrieved: {}" .format (len (total ['data' ][node ])))
449+
450+ if format is 'tsv' :
451+ df = json_normalize (total ['data' ][node ])
452+ return df
453+ else :
454+ return total
455+
456+
395457 def get_uuids_in_node (self ,node ,project_id ):
396458 """
397459 This function returns a list of all the UUIDs of records
0 commit comments