Skip to content

Commit

Permalink
Google Docs: Submit form parameters to virus warning bypass (#199)
Browse files Browse the repository at this point in the history
* Google Docs: Submit form parameters to virus warning bypass.

Seems to fix an issue where we'd get a URL just going to /download,
and get a resulting 400 error.

* Google Docs: Update tests for form parameter submit logic.
  • Loading branch information
jbeshir authored Apr 22, 2024
1 parent 2a9eced commit 03f1d9b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 6 deletions.
11 changes: 9 additions & 2 deletions align_data/sources/articles/google_cloud.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import time
import urllib
from collections import UserDict
from pathlib import Path
from typing import Dict, Any, Iterator, Union, List, Set
Expand Down Expand Up @@ -223,8 +224,14 @@ def extract_gdrive_contents(link: str) -> Dict[str, Any]:
form_action_url = form_tag.get('action')
if not isinstance(form_action_url, str):
return {**result, 'error': 'Virus scan warning - no form action url'}

res = fetch(form_action_url)

query_components = {}
for tag in form_tag.find_all("input", type="hidden"):
query_components[tag['name']] = tag['value']

form_full_url = form_action_url + "?" + urllib.parse.urlencode(query_components)

res = fetch(form_full_url)

content_type = get_content_type(res)
if content_type & {"text/xml"}:
Expand Down
8 changes: 4 additions & 4 deletions tests/align_data/articles/test_google_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,11 @@ def test_extract_gdrive_contents_xml_with_confirm():

def fetcher(link, *args, **kwargs):
# The first request should get the google drive warning page
if link != "fetch/xml/contents":
if link != "fetch/xml/contents?id=foo":
html = """
<body>
<title>Google Drive - Virus scan warning</title>
<form action="fetch/xml/contents"></form>
<form action="fetch/xml/contents"><input type="hidden" name="id" value="foo" /></form>
</body>
"""
return Mock(
Expand Down Expand Up @@ -302,11 +302,11 @@ def test_extract_gdrive_contents_warning_with_unknown():

def fetcher(link, *args, **kwargs):
# The first request should get the google drive warning page
if link != "fetch/xml/contents":
if link != "fetch/xml/contents?id=foo":
html = """
<body>
<title>Google Drive - Virus scan warning</title>
<form action="fetch/xml/contents"></form>
<form action="fetch/xml/contents"><input type="hidden" name="id" value="foo" /></form>
</body>
"""
return Mock(
Expand Down

0 comments on commit 03f1d9b

Please sign in to comment.