Skip to content

Commit

Permalink
Update dataverse_repository_curation_assistant_functions.py
Browse files Browse the repository at this point in the history
Closes #29

The problem wasn't with deaccessioned datasets, but with any search URLs that contains HTML encoding, like %20, instead of the human-readable value that it represents, like a ' ' (space) character.

For some reason, the expected results aren't returned when the query parameter contains HTML encoding, but works when the space character is used instead, like ' '.

So  when someone enters "https://dataverse.harvard.edu/dataverse/harvard?q=%22Arif,%20Mohd%22", before the get_params function gets the parameters of that search url to pass to the Requests get function, it needs to convert any HTML encoding in the query parameter to human-readable characters first.
  • Loading branch information
jggautier committed May 24, 2022
1 parent 400ec49 commit 40cf127
Showing 1 changed file with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ def get_params(apiSearchURL):

# Add query to params dict
if paramValue.startswith('q='):
paramValue = convert_utf8bytes_to_characters(paramValue)
paramValue = convert_common_html_encoding(paramValue)
paramValue = paramValue.replace('+', ' ')
params['params']['q'] = paramValue.replace('q=', '')

# Add non-fq queries to params dict
Expand All @@ -407,14 +410,12 @@ def get_params(apiSearchURL):
if paramValue.split('=')[1] != '':
params['params'][key] = paramValue.split('=')[1]

# Add values of each type param to a
# Add values of each type param to typeParamList
if paramValue.startswith('type'):
# Convert types value in string
valueString = paramValue.split('=')[1]
typeParamList.append(valueString)

# Add fq queries to fq dict ignoring any dvObjectType params
# if paramValue.startswith('=') and 'dvObjectType' not in paramValue:
# Add fq queries to fq dict if paramValue.startswith('='):
if paramValue.startswith('='):
key = paramValue.replace('=', '').split(':')[0]
value = paramValue.split(':')[1]
Expand Down Expand Up @@ -466,14 +467,13 @@ def get_value_row_from_search_api_object(item, installationUrl):
'file_name': item['name'],
'dataset_pid': item['dataset_persistent_id']
}

return newRow


# Uses Search API to return dataframe containing info about datasets in a Dataverse installation
# Write progress and results to the tkinter window
def get_object_dataframe_from_search_api(
url, params, objectType, rootWindow=None, progressText=None, progressLabel=None, apiKey=''):
url, params, objectType, rootWindow=None, progressText=None, progressLabel=None, apiKey=None):

installationUrl = get_installation_url(url)

Expand Down

0 comments on commit 40cf127

Please sign in to comment.