Skip to content

Commit

Permalink
Add argument to not fetch results from S3. Pull Request #84 by vogxn.
Browse files Browse the repository at this point in the history
Squashed commit of the following:

commit ec82e00
Author: Prasanna Santhanam <[email protected]>
Date:   Wed Feb 25 10:21:00 2015 +0530

    fix: usr: argument fetch documented better

    also added documentation for the get_results call and behaviour with
    various arguments.

commit 1287b14
Author: Prasanna Santhanam <[email protected]>
Date:   Sun Feb 22 08:59:14 2015 +0530

    optionally fetch the results when they are greater than 50MB

    in certain cases the result_location is used to further create external
    tables. but since the sdk returns raw results this is not doable. for
    these i want to optionally fetch the result_location only and not read
    the results from the s3 bucket and return to client.
  • Loading branch information
Prasanna Santhanam authored and Rohit Agarwal committed Feb 25, 2015
1 parent bd0a409 commit 8261eda
Showing 1 changed file with 24 additions and 11 deletions.
35 changes: 24 additions & 11 deletions qds_sdk/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,21 @@ def get_jobs_id(cls, id):
return r.text


def get_results(self, fp=sys.stdout, inline=True, delim=None):
def get_results(self, fp=sys.stdout, inline=True, delim=None, fetch=True):
"""
Fetches the result for the command represented by this object
get_results will retrieve results of the command and write to stdout by default.
Optionally one can write to a filestream specified in `fp`. The `inline` argument
decides whether the result can be returned as a CRLF separated string. In cases where
the results are greater than 20MB, get_results will attempt to read from s3 and write
to fp. The retrieval of results from s3 can be turned off by the `fetch` argument
Args:
`fp`: a file object to write the results to directly
`inline`: whether or not results are returned inline as CRLF separated string
`fetch`: True to fetch the result even if it is greater than 20MB, False to
only get the result location on s3
"""
result_path = self.meta_data['results_resource']

Expand All @@ -179,16 +188,20 @@ def get_results(self, fp=sys.stdout, inline=True, delim=None):
# Can this happen? Don't know what's the right thing to do in this case.
pass
else:
acc = Account.find()
boto_conn = boto.connect_s3(aws_access_key_id=acc.storage_access_key,
aws_secret_access_key=acc.storage_secret_key)

log.info("Starting download from result locations: [%s]" % ",".join(r['result_location']))
#fetch latest value of num_result_dir
num_result_dir = Command.find(self.id).num_result_dir
for s3_path in r['result_location']:
# In Python 3, in this case, `fp` should always be binary mode.
_download_to_local(boto_conn, s3_path, fp, num_result_dir, delim=delim)
if fetch:
acc = Account.find()
boto_conn = boto.connect_s3(aws_access_key_id=acc.storage_access_key,
aws_secret_access_key=acc.storage_secret_key)

log.info("Starting download from result locations: [%s]" % ",".join(r['result_location']))
#fetch latest value of num_result_dir
num_result_dir = Command.find(self.id).num_result_dir
for s3_path in r['result_location']:
# In Python 3, in this case, `fp` should always be binary mode.
_download_to_local(boto_conn, s3_path, fp, num_result_dir, delim=delim)
else:
fp.write(",".join(r['result_location']))



class HiveCommand(Command):
Expand Down

0 comments on commit 8261eda

Please sign in to comment.