d3b-center · sickler-alex · Apr 25, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024
diff --git a/d3b_dff_cli/modules/dewrangle/download_job.py b/d3b_dff_cli/modules/dewrangle/download_job.py
@@ -17,8 +17,10 @@ def download_job(jobid, token=None):
 def main(args):
     """Main function."""
 
-    status, job_df = download_job(args.jobid)
+    status, job_res = download_job(args.jobid)
     if status == "Complete":
-        job_df.to_csv(args.outfile)
+        with open(args.outfile, "w") as f:
+            for line in job_res:
+                f.write("%s\n" % ",".join(line))
     else:
         print("Job incomplete, please check again later.")
diff --git a/d3b_dff_cli/modules/dewrangle/helper_functions.py b/d3b_dff_cli/modules/dewrangle/helper_functions.py
@@ -1,11 +1,8 @@
 """Dewrangle helper functions"""
 
 import os
-import sys
-import traceback
 import configparser
 import requests
-import pandas as pd
 from gql import gql, Client
 from gql.transport.aiohttp import AIOHTTPTransport
 from datetime import datetime
@@ -592,22 +589,6 @@ def get_job_info(jobid, client=None):
     return result
 
 
-def request_to_df(url, **kwargs):
-    """Call api and return response as a pandas dataframe."""
-    my_data = []
-    with requests.get(url, **kwargs) as response:
-        # check if the request was successful
-        if response.status_code == 200:
-            for line in response.iter_lines():
-                my_data.append(line.decode().split(","))
-        else:
-            print(f"Failed to fetch the CSV. Status code: {response.status_code}")
-
-    my_cols = my_data.pop(0)
-    df = pd.DataFrame(my_data, columns=my_cols)
-    return df
-
-
 def download_job_result(jobid, client=None, api_key=None):
     """Check if a job is complete, download results if it is.
     If the job is a list and hash job, only download the hash result."""
@@ -616,7 +597,7 @@ def download_job_result(jobid, client=None, api_key=None):
 
     job_status = None
 
-    job_result = None
+    job_result = []
 
     job_info = get_job_info(jobid, client)
 
@@ -643,7 +624,17 @@ def download_job_result(jobid, client=None, api_key=None):
                     if child_job["operation"] == "VOLUME_HASH":
                         jobid = child_job["id"]
             url = endpoint + jobid + "/result"
-            job_result = request_to_df(url, headers=req_header, stream=True)
+
+            # Query Dewrangle REST API and get the job results
+            with requests.get(url, headers=req_header, stream=True) as response:
+                # check if the request was successful
+                if response.status_code == 200:
+                    for line in response.iter_lines():
+                        job_result.append(line.decode().split(","))
+
+                else:
+                    print(f"Failed to fetch the CSV. Status code: {response.status_code}")
+
         else:
             print("Job type {} does not have results to download".format(job_type))
 

diff --git a/d3b_dff_cli/modules/validation/check_manifest.py b/d3b_dff_cli/modules/validation/check_manifest.py
@@ -1,5 +1,4 @@
 import json
-import pandas as pd
 import argparse
 import csv
 

diff --git a/requirements.txt b/requirements.txt
@@ -13,7 +13,6 @@ graphql-core==3.2.3
 idna==3.6
 multidict==6.0.5
 numpy==1.24.4
-pandas==2.0.3
 pysam==0.22.0
 python-dateutil==2.8.2
 pytz==2023.3.post1