From cedea4fed3add985a9937914b180b5b607a90235 Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Wed, 24 Apr 2024 09:34:54 -0400 Subject: [PATCH 1/4] :fire: remove uneeded pandas import --- d3b_dff_cli/modules/validation/check_manifest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/d3b_dff_cli/modules/validation/check_manifest.py b/d3b_dff_cli/modules/validation/check_manifest.py index 79d0c74..08e8c74 100644 --- a/d3b_dff_cli/modules/validation/check_manifest.py +++ b/d3b_dff_cli/modules/validation/check_manifest.py @@ -1,5 +1,4 @@ import json -import pandas as pd import argparse import csv From 9e39c95997cb7b344a0b4dd393b84e2cd763c2f9 Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Wed, 24 Apr 2024 12:49:44 -0400 Subject: [PATCH 2/4] :fire: remove pandas from helper functions --- .../modules/dewrangle/helper_functions.py | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/d3b_dff_cli/modules/dewrangle/helper_functions.py b/d3b_dff_cli/modules/dewrangle/helper_functions.py index 230a44f..d8852f2 100644 --- a/d3b_dff_cli/modules/dewrangle/helper_functions.py +++ b/d3b_dff_cli/modules/dewrangle/helper_functions.py @@ -1,11 +1,8 @@ """Dewrangle helper functions""" import os -import sys -import traceback import configparser import requests -import pandas as pd from gql import gql, Client from gql.transport.aiohttp import AIOHTTPTransport from datetime import datetime @@ -592,22 +589,6 @@ def get_job_info(jobid, client=None): return result -def request_to_df(url, **kwargs): - """Call api and return response as a pandas dataframe.""" - my_data = [] - with requests.get(url, **kwargs) as response: - # check if the request was successful - if response.status_code == 200: - for line in response.iter_lines(): - my_data.append(line.decode().split(",")) - else: - print(f"Failed to fetch the CSV. Status code: {response.status_code}") - - my_cols = my_data.pop(0) - df = pd.DataFrame(my_data, columns=my_cols) - return df - - def download_job_result(jobid, client=None, api_key=None): """Check if a job is complete, download results if it is. If the job is a list and hash job, only download the hash result.""" @@ -616,7 +597,7 @@ def download_job_result(jobid, client=None, api_key=None): job_status = None - job_result = None + job_result = [] job_info = get_job_info(jobid, client) @@ -643,7 +624,19 @@ def download_job_result(jobid, client=None, api_key=None): if child_job["operation"] == "VOLUME_HASH": jobid = child_job["id"] url = endpoint + jobid + "/result" - job_result = request_to_df(url, headers=req_header, stream=True) + + # Query Dewrangle REST API and get the job results + with requests.get(url, headers=req_header, stream=True) as response: + # check if the request was successful + if response.status_code == 200: + for line in response.iter_lines(): + job_result.append(line.decode().split(",")) + + else: + print(f"Failed to fetch the CSV. Status code: {response.status_code}") + + print(job_result) + else: print("Job type {} does not have results to download".format(job_type)) From 8256d325807e5f50112001b0a270fdcff664fc24 Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Wed, 24 Apr 2024 13:29:13 -0400 Subject: [PATCH 3/4] :sparkle: change how output file is written --- d3b_dff_cli/modules/dewrangle/download_job.py | 6 ++++-- d3b_dff_cli/modules/dewrangle/helper_functions.py | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/d3b_dff_cli/modules/dewrangle/download_job.py b/d3b_dff_cli/modules/dewrangle/download_job.py index 68ccf5c..e1a866f 100644 --- a/d3b_dff_cli/modules/dewrangle/download_job.py +++ b/d3b_dff_cli/modules/dewrangle/download_job.py @@ -17,8 +17,10 @@ def download_job(jobid, token=None): def main(args): """Main function.""" - status, job_df = download_job(args.jobid) + status, job_res = download_job(args.jobid) if status == "Complete": - job_df.to_csv(args.outfile) + with open(args.outfile, "w") as f: + for line in job_res: + f.write("%s\n" % ",".join(line)) else: print("Job incomplete, please check again later.") diff --git a/d3b_dff_cli/modules/dewrangle/helper_functions.py b/d3b_dff_cli/modules/dewrangle/helper_functions.py index d8852f2..900946c 100644 --- a/d3b_dff_cli/modules/dewrangle/helper_functions.py +++ b/d3b_dff_cli/modules/dewrangle/helper_functions.py @@ -635,8 +635,6 @@ def download_job_result(jobid, client=None, api_key=None): else: print(f"Failed to fetch the CSV. Status code: {response.status_code}") - print(job_result) - else: print("Job type {} does not have results to download".format(job_type)) From aafa31b48e71aa935634bc4416b11221040a231a Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Wed, 24 Apr 2024 13:29:49 -0400 Subject: [PATCH 4/4] :fire: remove pandas from requirements --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e82c1bb..9a35e83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,6 @@ graphql-core==3.2.3 idna==3.6 multidict==6.0.5 numpy==1.24.4 -pandas==2.0.3 pysam==0.22.0 python-dateutil==2.8.2 pytz==2023.3.post1