Skip to content

Commit

Permalink
Merge pull request #6 from d3b-center/feature/reorganize_dewrangle
Browse files Browse the repository at this point in the history
Feature/reorganize dewrangle
  • Loading branch information
sickler-alex authored Apr 2, 2024
2 parents 6b10c41 + da7b851 commit d554e7c
Show file tree
Hide file tree
Showing 5 changed files with 802 additions and 453 deletions.
60 changes: 42 additions & 18 deletions d3b_dff_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,47 @@
from .modules.validation.check_readgroup import main as check_readgroup
from .modules.validation.check_url import main as check_url
from .modules.dewrangle.volume import main as hash_volume
from .modules.dewrangle.list_jobs import main as list_jobs
from .modules.dewrangle.download_job import main as download_dewrangle_job


def add_hash_arguments(my_parser):
def add_dewrangle_arguments(my_parser):
"""
Create parser for volume hash subcommand.
Add standard arguments for Dewrangle subcommands.
Input:
- my_parser: argparse parser being added to
Output:
- original parser with added arguments
"""
hash_parser = my_parser.add_parser(
"hash", help="Hash volume in Dewrangle"
)
hash_parser.add_argument(
my_parser.add_argument(
"-prefix",
help="Optional, Path prefix. Default: None",
default=None,
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-region",
help="Optional, Bucket AWS region code. Default: us-east-1",
default="us-east-1",
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-billing",
help="Optional, billing group name. When not provided, use default billing group for organization",
default=None,
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-credential",
help="Dewrangle AWS credential name. Default, try to find available credential.",
required=False,
)
hash_parser.add_argument(
my_parser.add_argument(
"-study", help="Study name, global id, or study id", required=True
)
hash_parser.add_argument("-bucket", help="Bucket name", required=True)
hash_parser.set_defaults(func=hash_volume)

return hash_parser
my_parser.add_argument("-bucket", help="Bucket name", required=True)

return my_parser


def main():
Expand Down Expand Up @@ -104,14 +101,42 @@ def main():
parser_url.add_argument("urls", nargs="+", help="One or more URLs to validate")
parser_url.set_defaults(func=check_url)

# Volume Command
# Dewrangle commands
# hash: load a bucket to Dewrangle and hash it
# list_jobs: list jobs run on a bucket
# download: download the results of a job
dewrangle_parser = subparsers.add_parser("dewrangle", help="Dewrangle commands")
dewrangle_subparsers = dewrangle_parser.add_subparsers(
title="Dewrangle Subcommands", dest="dewrangle_command"
)

# volume hash subcommand
hash_parser = add_hash_arguments(dewrangle_subparsers)
# hash subcommand
hash_parser = dewrangle_subparsers.add_parser("hash", help="Hash volume in Dewrangle")
hash_parser = add_dewrangle_arguments(hash_parser)
hash_parser.set_defaults(func=hash_volume)

# list_jobs subcommand
list_parser = dewrangle_subparsers.add_parser(
"list_jobs", help="List volume jobs in Dewrangle"
)
list_parser = add_dewrangle_arguments(list_parser)
list_parser.set_defaults(func=list_jobs)

# download subcommand
dl_parser = dewrangle_subparsers.add_parser(
"download", help="Download job results from Dewrangle"
)
dl_parser.add_argument(
"-jobid",
help="Dewrangle jobid",
required=True,
)
dl_parser.add_argument(
"-outfile",
help="Output file name",
required=True,
)
dl_parser.set_defaults(func=download_dewrangle_job)

args = parser.parse_args()

Expand Down Expand Up @@ -140,6 +165,5 @@ def main():
sys.exit(2)



if __name__ == "__main__":
main()
24 changes: 24 additions & 0 deletions d3b_dff_cli/modules/dewrangle/download_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Download job results from Dewrangle."""

from . import helper_functions as hf

def download_job(jobid, token=None):
"""
Function to download results from Dewrangle
Input: Dewrangle job id
Output: object with job resuls
"""

client = hf.create_gql_client(api_key=token)

return hf.download_job_result(jobid, client=client, api_key=token)


def main(args):
"""Main function."""

status, job_df = download_job(args.jobid)
if status == "Complete":
job_df.to_csv(args.outfile)
else:
print("Job incomplete, please check again later.")
Loading

0 comments on commit d554e7c

Please sign in to comment.