Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapt gdown to be used as python library #244

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions gdown/cached_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,8 @@ def cached_download(
print("File exists: {}".format(path))
return path
elif osp.exists(path) and md5:
try:
assert_md5sum(path, md5, quiet=quiet)
return path
except AssertionError as e:
print(e, file=sys.stderr)
assert_md5sum(path, md5, quiet=quiet)
return path

# download
lock_path = osp.join(cache_root, "_dl_lock")
Expand All @@ -110,7 +107,7 @@ def cached_download(
msg = "{}: {}".format(msg, path)
else:
msg = "{}...".format(msg)
print(msg, file=sys.stderr)
print(msg)

download(url, temp_path, quiet=quiet, **kwargs)
with filelock.FileLock(lock_path):
Expand Down
59 changes: 32 additions & 27 deletions gdown/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,33 +134,38 @@ def main():
url = None
id = args.url_or_id

if args.folder:
filenames = download_folder(
url=url,
id=id,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
remaining_ok=args.remaining_ok,
)
success = filenames is not None
else:
filename = download(
url=url,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
id=id,
fuzzy=args.fuzzy,
resume=args.continue_,
)
success = filename is not None

try:
if args.folder:
filenames = download_folder(
url=url,
id=id,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
remaining_ok=args.remaining_ok,
)
success = filenames is not None
else:
filename = download(
url=url,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
id=id,
fuzzy=args.fuzzy,
resume=args.continue_,
)
success = filename is not None
except Exception as e:
print(e, file=sys.stderr)
success = False

if not success:
sys.exit(1)
Expand Down
66 changes: 28 additions & 38 deletions gdown/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import os.path as osp
import re
import shutil
import sys
import tempfile
import textwrap
import time

import requests
import six
import tqdm
from requests import HTTPError, RequestException
from requests.exceptions import ProxyError

from .parse_url import parse_url

Expand Down Expand Up @@ -89,6 +89,7 @@ def download(
id=None,
fuzzy=False,
resume=False,
progress=None
):
"""Download file from URL.
Expand Down Expand Up @@ -117,6 +118,8 @@ def download(
resume: bool
Resume the download from existing tmp file if possible.
Default is False.
progress: tqdm.tqdm
callback implementing tqdm progress interface
Returns
-------
Expand All @@ -127,6 +130,9 @@ def download(
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if progress is None:
import tqdm
progress = lambda total: tqdm.tqdm(total=total, unit="B", unit_scale=True)

url_origin = url

Expand All @@ -136,7 +142,7 @@ def download(

if proxy is not None:
sess.proxies = {"http": proxy, "https": proxy}
print("Using proxy:", proxy, file=sys.stderr)
print("Using proxy:", proxy)

gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)

Expand All @@ -153,10 +159,8 @@ def download(
while True:
try:
res = sess.get(url, headers=headers, stream=True, verify=verify)
except requests.exceptions.ProxyError as e:
print("An error has occurred using proxy:", proxy, file=sys.stderr)
print(e, file=sys.stderr)
return
except ProxyError as e:
raise ProxyError(f"An error has occurred using proxy: ${proxy}", code=407) from e

if use_cookies:
if not osp.exists(osp.dirname(cookies_file)):
Expand All @@ -180,16 +184,14 @@ def download(
try:
url = get_url_from_gdrive_confirmation(res.text)
except RuntimeError as e:
print("Access denied with the following error:")
error = "\n".join(textwrap.wrap(str(e)))
error = indent(error, "\t")
print("\n", error, "\n", file=sys.stderr)
print(
"You may still be able to access the file from the browser:",
file=sys.stderr,
)
print("\n\t", url_origin, "\n", file=sys.stderr)
return
raise HTTPError(
"Access denied with the following error:\n" +
indent("\n".join(textwrap.wrap(str(e))), "\t") +
"\n" +
"You may still be able to access the file from the browser:" +
"\n\t" + url_origin + "\n",
code=403
) from e

if gdrive_file_id and is_gdrive_download_link:
content_disposition = six.moves.urllib_parse.unquote(
Expand Down Expand Up @@ -217,19 +219,11 @@ def download(
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
raise RequestException(
"There are multiple temporary files to resume:\n" +
"\t".join(existing_tmp_files) + "\n" +
"Please remove them except one to resume downloading."
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
Expand All @@ -250,22 +244,21 @@ def download(
res = sess.get(url, headers=headers, stream=True, verify=verify)

if not quiet:
print("Downloading...", file=sys.stderr)
print("Downloading...")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
print("From:", url_origin, file=sys.stderr)
print("Resume:", tmp_file)
print("From:", url_origin)
print(
"To:",
osp.abspath(output) if output_is_path else output,
file=sys.stderr,
osp.abspath(output) if output_is_path else output
)

try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total)
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", unit_scale=True)
pbar = progress(total=total)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
f.write(chunk)
Expand All @@ -281,9 +274,6 @@ def download(
if tmp_file:
f.close()
shutil.move(tmp_file, output)
except IOError as e:
print(e, file=sys.stderr)
return
finally:
sess.close()

Expand Down
22 changes: 10 additions & 12 deletions gdown/download_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import warnings

import bs4
from requests import RequestException, HTTPError

from .download import _get_session
from .download import download
Expand Down Expand Up @@ -252,7 +253,7 @@ def download_folder(
sess = _get_session(use_cookies=use_cookies)

if not quiet:
print("Retrieving folder list", file=sys.stderr)
print("Retrieving folder list")
try:
return_code, gdrive_file = _download_and_parse_google_drive_link(
sess,
Expand All @@ -262,17 +263,16 @@ def download_folder(
verify=verify,
)
except RuntimeError as e:
print("Failed to retrieve folder contents:", file=sys.stderr)
error = "\n".join(textwrap.wrap(str(e)))
error = indent(error, "\t")
print("\n", error, "\n", file=sys.stderr)
return
raise RequestException(
"Failed to retrieve folder contents:" +
indent("\n".join(textwrap.wrap(str(e))), "\t")
) from e

if not return_code:
return return_code
if not quiet:
print("Retrieving folder list completed", file=sys.stderr)
print("Building directory structure", file=sys.stderr)
print("Retrieving folder list completed")
print("Building directory structure")
if output is None:
output = os.getcwd() + osp.sep
if output.endswith(osp.sep):
Expand Down Expand Up @@ -303,10 +303,8 @@ def download_folder(
)

if filename is None:
if not quiet:
print("Download ended unsuccessfully", file=sys.stderr)
return
raise HTTPError("Download ended unsuccessfully")
filenames.append(filename)
if not quiet:
print("Download completed", file=sys.stderr)
print("Download completed")
return filenames