Skip to content

Commit

Permalink
HARMONY-1721: Switch from using application/x-www-form-urlencoded to …
Browse files Browse the repository at this point in the history
…multipart/form-data since that's the only way harmony accepts POST requests on the OGC coverages API. Also fix a bug with the way dimensions were passed.
  • Loading branch information
chris-durbin committed Mar 13, 2024
1 parent e82881e commit 3376186
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 21 deletions.
29 changes: 14 additions & 15 deletions harmony/harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ def _dimension_subset_params(self, request: BaseRequest) -> list:
for dim in request.dimensions:
dim_min = dim.min if dim.min is not None else '*'
dim_max = dim.max if dim.max is not None else '*'
dim_query_param = [f'{dim.name}({dim_min}:{dim_max})']
dim_query_param = f'{dim.name}({dim_min}:{dim_max})'
dimensions.append(dim_query_param)
return dimensions
else:
Expand Down Expand Up @@ -720,35 +720,34 @@ def _get_prepared_request(self, request: BaseRequest) -> requests.models.Prepare
params = self._params(request)
headers = self._headers()

method = self._http_method(request)
with self._files(request) as files:
if files:
if files or method == 'POST':
# Ideally this should just be files=files, params=params but Harmony
# cannot accept both files and query params now. (HARMONY-290)
# Inflate params to a list of tuples that can be passed as multipart
# form-data. This must be done this way due to e.g. "subset" having
# multiple values

# Note: harmony only supports multipart/form-data which is why we use
# the workaround with files rather than `data=params` even when there
# is no shapefile to send

param_items = self._params_dict_to_files(params)
file_items = [(k, v) for k, v in files.items()]
all_files = param_items + file_items

r = requests.models.Request('POST',
self._submit_url(request),
files=param_items + file_items,
files=all_files,
headers=headers)
prepped_request = session.prepare_request(r)
else:
method = self._http_method(request)
if method == 'POST':
r = requests.models.Request('POST',
self._submit_url(request),
data=params,
headers=headers)
else:
r = requests.models.Request('GET',
self._submit_url(request),
params=params,
headers=headers)
prepped_request = session.prepare_request(r)
r = requests.models.Request('GET',
self._submit_url(request),
params=params,
headers=headers)
prepped_request = session.prepare_request(r)

return prepped_request

Expand Down
77 changes: 71 additions & 6 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,74 @@ def expected_pause_url(job_id, link_type: LinkType = LinkType.https):
def expected_resume_url(job_id, link_type: LinkType = LinkType.https):
return f'https://harmony.earthdata.nasa.gov/jobs/{job_id}/resume?linktype={link_type.value}'

def parse_multipart_data(request):
"""Parses multipart/form-data request to extract fields as strings."""
body_bytes = request.body
content_type = request.headers['Content-Type']

# Extract boundary from Content-Type header
boundary = content_type.split("boundary=")[1]
boundary_bytes = ('--' + boundary).encode()

parts = body_bytes.split(boundary_bytes)

form_data = {}
cd_regex = re.compile(rb'Content-Disposition: form-data; name="([^"]+)"(?:; filename="([^"]+)")?', re.IGNORECASE)

for part in parts:
if part.strip():
# Splitting headers and body
parts_split = part.split(b'\r\n\r\n', 1)
if len(parts_split) == 2:
headers, body = parts_split
body = body.strip(b'\r\n')

cd_match = cd_regex.search(headers)
if cd_match:
field_name = cd_match.group(1).decode('utf-8')
filename = cd_match.group(2)

if filename:
filename = filename.decode('utf-8')
form_data[field_name] = {'filename': filename, 'content': body}
else: # It's a regular form field
value = body.decode('utf-8').strip()
if field_name in form_data:
# If it's already a list, append to it
if isinstance(form_data[field_name], list):
form_data[field_name].append(value)
else:
# If it's not a list, make it a list with the old and new value
form_data[field_name] = [form_data[field_name], value]
else:
# If the field doesn't exist, add it normally
form_data[field_name] = value

return form_data

def construct_expected_params(query_string):
"""Returns the expected parameters from a query string. Needed a custom function to
handle multiple values for the same parameter name such as `subset`.
"""
parsed_params = urllib.parse.parse_qsl(query_string)
expected_params = {}
for key, value in parsed_params:
if key in expected_params:
# If the key already exists, and it's not a list, convert it to a list
if not isinstance(expected_params[key], list):
expected_params[key] = [expected_params[key]]
# Append the new value to the existing list
expected_params[key].append(value)
else:
# Add the key-value pair to the dictionary
expected_params[key] = value
return expected_params

def is_expected_url_and_form_encoded_body(harmony_request, http_request):
body_params = dict(urllib.parse.parse_qsl(http_request.body))
"""Returns True if the URL and form encoded body match what is expected based
on the harmony request object.
"""
form_data_params = parse_multipart_data(http_request)
async_params = ['forceAsync=true']

spatial_params = []
Expand Down Expand Up @@ -62,9 +128,9 @@ def is_expected_url_and_form_encoded_body(harmony_request, http_request):
if harmony_request.skip_preview is not None:
query_params += f'&skipPreview={str(harmony_request.skip_preview).lower()}'

expected_params = dict(urllib.parse.parse_qsl(query_params))
expected_params = construct_expected_params(query_params)

return body_params == expected_params and http_request.url == expected_submit_url(harmony_request.collection.id)
return form_data_params == expected_params and http_request.url == expected_submit_url(harmony_request.collection.id)

def expected_capabilities_url(request_params: dict):
collection_id = request_params.get('collection_id')
Expand Down Expand Up @@ -490,9 +556,8 @@ def test_request_has_query_param(param, expected):

assert len(responses.calls) == 1

body = responses.calls[0].request.body
body_params = dict(urllib.parse.parse_qsl(body))
expected_params = dict(urllib.parse.parse_qsl(expected))
body_params = parse_multipart_data(responses.calls[0].request)
expected_params = construct_expected_params(expected)

assert body_params == expected_params

Expand Down

0 comments on commit 3376186

Please sign in to comment.