From 38c3695ee30e5a8523a233dddbb2751e51bd42b3 Mon Sep 17 00:00:00 2001 From: Mahesh Vashishtha Date: Tue, 4 Feb 2020 18:10:18 -0800 Subject: [PATCH] Make some Python API methods compatible with coming Mixer API changes. We are going to change the places-in, population, and observation mixer API methods so that they return maps from dcid to lists of dcids, population dcids, and observation values respectively. Update the corresponding Python API methods so that they work with both the new and old Mixer API versions. --- README.md | 4 +- datacommons/test/places_test.py | 93 ++++++++++++++++++ datacommons/test/populations_test.py | 136 +++++++++++++++++++++++---- datacommons/utils.py | 32 +++++-- 4 files changed, 236 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index dc1d902e..ad5c0849 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,8 @@ The Python Client API currently supports `python>=3.6`. We use bazel then run the following: ``` -$ bazel build //... -$ bazel test //... +$ bazel build //... --incompatible_disable_deprecated_attr_params=false +$ bazel test //... --incompatible_disable_deprecated_attr_params=false ``` ## Support diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py index e0f9670d..60a1f622 100644 --- a/datacommons/test/places_test.py +++ b/datacommons/test/places_test.py @@ -90,6 +90,52 @@ def read(self): # Otherwise, return an empty response and a 404. return urllib.error.HTTPError +# Returns the PlacesIn response as a dict instead of a list of Objects. +def request_mock_with_new_api(*args, **kwargs): + """ A mock urlopen requests sent in the requests package. """ + # Create the mock response object. + class MockResponse: + def __init__(self, json_data): + self.json_data = json_data + + def read(self): + return self.json_data + + req = args[0] + data = json.loads(req.data) + + # If the API key does not match, then return 403 Forbidden + api_key = req.get_header('X-api-key') + if api_key != 'TEST-API-KEY': + return urllib.error.HTTPError(None, 403, None, None, None) + + # Mock responses for urlopen requests to get_places_in. + if req.full_url == utils._API_ROOT + utils._API_ENDPOINTS['get_places_in']: + if (data['dcids'] == ['geoId/06085', 'geoId/24031'] + and data['place_type'] == 'City'): + # Response returned when querying for multiple valid dcids. + res_json = json.dumps( + {'geoId/06085': ['geoId/0649670'], + 'geoId/24031': ['geoId/2467675', 'geoId/2476650']}) + return MockResponse(json.dumps({'payload': res_json})) + if (data['dcids'] == ['geoId/06085', 'dc/MadDcid'] + and data['place_type'] == 'City'): + # Response returned when querying for a dcid that does not exist. + res_json = json.dumps({'geoId/06085': ['geoId/0649670']}) + return MockResponse(json.dumps({'payload': res_json})) + if data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid']\ + and data['place_type'] == 'City': + # Response returned when both given dcids do not exist. + res_json = json.dumps({}) + return MockResponse(json.dumps({'payload': res_json})) + if data['dcids'] == [] and data['place_type'] == 'City': + res_json = json.dumps({}) + # Response returned when no dcids are given. + return MockResponse(json.dumps({'payload': res_json})) + + # Otherwise, return an empty response and a 404. + return urllib.error.HTTPError + class TestGetPlacesIn(unittest.TestCase): """ Unit stests for get_places_in. """ @@ -106,6 +152,19 @@ def test_multiple_dcids(self, urlopen): 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] }) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_with_new_api) + def test_multiple_dcids_new_mixer_api(self, urlopen): + """ Calling get_places_in with proper dcids returns valid results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_places_in + places = dc.get_places_in(['geoId/06085', 'geoId/24031'], 'City') + self.assertDictEqual(places, { + 'geoId/06085': ['geoId/0649670'], + 'geoId/24031': ['geoId/2467675', 'geoId/2476650'] + }) + @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): """ Calling get_places_in with dcids that do not exist returns empty @@ -128,6 +187,28 @@ def test_bad_dcids(self, urlopen): 'dc/MadderDcid': [] }) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_with_new_api) + def test_bad_dcids_new_mixer_api(self, urlopen): + """ Calling get_places_in with dcids that do not exist returns empty + results. + """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_places_in with one dcid that does not exist + bad_dcids_1 = dc.get_places_in(['geoId/06085', 'dc/MadDcid'], 'City') + self.assertDictEqual(bad_dcids_1, { + 'geoId/06085': ['geoId/0649670'], + 'dc/MadDcid': [] + }) + + # Call get_places_in when both dcids do not exist + bad_dcids_2 = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') + self.assertDictEqual(bad_dcids_2, { + 'dc/MadDcid': [], + 'dc/MadderDcid': [] + }) + @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): """ Calling get_places_in with no dcids returns empty results. """ @@ -141,6 +222,18 @@ def test_no_dcids(self, urlopen): 'dc/MadderDcid': [] }) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_with_new_api) + def test_no_dcids_new_mixer_api(self, urlopen): + """ Calling get_places_in with no dcids returns empty results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_places_in with no dcids. + bad_dcids = dc.get_places_in(['dc/MadDcid', 'dc/MadderDcid'], 'City') + self.assertDictEqual(bad_dcids, { + 'dc/MadDcid': [], + 'dc/MadderDcid': [] + }) if __name__ == '__main__': unittest.main() diff --git a/datacommons/test/populations_test.py b/datacommons/test/populations_test.py index cd1e2f03..ebef4ff5 100644 --- a/datacommons/test/populations_test.py +++ b/datacommons/test/populations_test.py @@ -31,8 +31,10 @@ import urllib import zlib - -def request_mock(*args, **kwargs): +# new_mixer_api specifies whether to use the Mixer API +# that gives dictionaries instead of lists for places-in, +# population, and observation functions. +def request_mock_base(*args, **kwargs): """ A mock urlopen request sent in the requests package. """ # Create the mock response object. class MockResponse: @@ -68,20 +70,21 @@ def read(self): and data['pvs'] == constrained_props: if data['dcids'] == ['geoId/06085', 'geoId/4805000']: # Response returned when querying for multiple valid dcids. - res_json = json.dumps([ - { - 'dcid': 'geoId/06085', - 'population': 'dc/p/crgfn8blpvl35' - }, - { - 'dcid': 'geoId/4805000', - 'population': 'dc/p/f3q9whmjwbf36' - } - ]) + res_json =json.dumps({ + 'geoId/06085': 'dc/p/crgfn8blpvl35', + 'geoId/4805000': 'dc/p/f3q9whmjwbf36'}) if kwargs['new_mixer_api'] else json.dumps([ + { + 'dcid': 'geoId/06085', + 'population': 'dc/p/crgfn8blpvl35' + }, + { + 'dcid': 'geoId/4805000', + 'population': 'dc/p/f3q9whmjwbf36' + }]) return MockResponse(json.dumps({'payload': res_json})) if data['dcids'] == ['geoId/06085', 'dc/MadDcid']: # Response returned when querying for a dcid that does not exist. - res_json = json.dumps([ + res_json = json.dumps({'geoId/06085': 'dc/p/crgfn8blpvl35'}) if kwargs['new_mixer_api'] else json.dumps([ { 'dcid': 'geoId/06085', 'population': 'dc/p/crgfn8blpvl35' @@ -91,7 +94,7 @@ def read(self): if data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid'] or data['dcids'] == []: # Response returned when both given dcids do not exist or no dcids are # provided to the method. - res_json = json.dumps([]) + res_json = json.dumps({} if kwargs['new_mixer_api'] else []) return MockResponse(json.dumps({'payload': res_json})) # Mock responses for urlopen request to get_observations @@ -103,7 +106,7 @@ def read(self): and data['measurement_method'] == 'BLSSeasonallyAdjusted': if data['dcids'] == ['dc/p/x6t44d8jd95rd', 'dc/p/lr52m1yr46r44', 'dc/p/fs929fynprzs']: # Response returned when querying for multiple valid dcids. - res_json = json.dumps([ + res_json = json.dumps({'dc/p/x6t44d8jd95rd': '18704962.000000', 'dc/p/lr52m1yr46r44': '3075662.000000', 'dc/p/fs929fynprzs': '1973955.000000'}) if kwargs['new_mixer_api'] else json.dumps([ { 'dcid': 'dc/p/x6t44d8jd95rd', 'observation': '18704962.000000' @@ -120,7 +123,7 @@ def read(self): return MockResponse(json.dumps({'payload': res_json})) if data['dcids'] == ['dc/p/x6t44d8jd95rd', 'dc/MadDcid']: # Response returned when querying for a dcid that does not exist. - res_json = json.dumps([ + res_json = json.dumps({'dc/p/x6t44d8jd95rd' : '18704962.000000'}) if kwargs['new_mixer_api'] else json.dumps([ { 'dcid': 'dc/p/x6t44d8jd95rd', 'observation': '18704962.000000' @@ -130,7 +133,7 @@ def read(self): if data['dcids'] == ['dc/MadDcid', 'dc/MadderDcid'] or data['dcids'] == []: # Response returned when both given dcids do not exist or no dcids are # provided to the method. - res_json = json.dumps([]) + res_json = json.dumps({} if kwargs['new_mixer_api'] else []) return MockResponse(json.dumps({'payload': res_json})) # Mock responses for urlopen request to get_place_obs @@ -203,6 +206,12 @@ def read(self): # Otherwise, return an empty response and a 404. return urllib.error.HTTPError(None, 404, None, None, None) +def request_mock(*args, **kwargs): + return request_mock_base(new_mixer_api=False, *args, **kwargs) + +def request_mock_new_mixer_api(*args, **kwargs): + return request_mock_base(new_mixer_api=True, *args, **kwargs) + class TestGetPopulations(unittest.TestCase): """ Unit tests for get_populations. """ @@ -225,6 +234,19 @@ def test_multiple_dcids(self, urlopen): 'geoId/4805000': 'dc/p/f3q9whmjwbf36' }) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_new_mixer_api) + def test_multiple_dcids_new_mixer_api(self, urlopen): + """ Calling get_populations with proper dcids returns valid results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_populations + populations = dc.get_populations(['geoId/06085', 'geoId/4805000'], 'Person', + constraining_properties=self._constraints) + self.assertDictEqual(populations, { + 'geoId/06085': 'dc/p/crgfn8blpvl35', + 'geoId/4805000': 'dc/p/f3q9whmjwbf36' + }) @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): @@ -244,6 +266,24 @@ def test_bad_dcids(self, urlopen): self.assertDictEqual(pops_1, {'geoId/06085': 'dc/p/crgfn8blpvl35'}) self.assertDictEqual(pops_2, {}) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_new_mixer_api) + def test_bad_dcids_new_mixer_api(self, urlopen): + """ Calling get_populations with dcids that do not exist returns empty + results. + """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Call get_populations + pops_1 = dc.get_populations(['geoId/06085', 'dc/MadDcid'], 'Person', + constraining_properties=self._constraints) + pops_2 = dc.get_populations(['dc/MadDcid', 'dc/MadderDcid'], 'Person', + constraining_properties=self._constraints) + + # Verify the results + self.assertDictEqual(pops_1, {'geoId/06085': 'dc/p/crgfn8blpvl35'}) + self.assertDictEqual(pops_2, {}) + @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): """ Calling get_populations with no dcids returns empty results. """ @@ -254,6 +294,16 @@ def test_no_dcids(self, urlopen): [], 'Person', constraining_properties=self._constraints) self.assertDictEqual(pops, {}) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_new_mixer_api) + def test_no_dcids_with_new_mixer_api(self, urlopen): + """ Calling get_populations with no dcids returns empty results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + pops = dc.get_populations( + [], 'Person', constraining_properties=self._constraints) + self.assertDictEqual(pops, {}) + class TestGetObservations(unittest.TestCase): """ Unit tests for get_observations. """ @@ -274,6 +324,23 @@ def test_multiple_dcids(self, urlopen): measurement_method='BLSSeasonallyAdjusted') self.assertDictEqual(actual, expected) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_new_mixer_api) + def test_multiple_dcids_with_new_mixer_api(self, urlopen): + """ Calling get_observations with proper dcids returns valid results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + dcids = ['dc/p/x6t44d8jd95rd', 'dc/p/lr52m1yr46r44', 'dc/p/fs929fynprzs'] + expected = { + 'dc/p/lr52m1yr46r44': 3075662.0, + 'dc/p/fs929fynprzs': 1973955.0, + 'dc/p/x6t44d8jd95rd': 18704962.0 + } + actual = dc.get_observations(dcids, 'count', 'measuredValue', '2018-12', + observation_period='P1M', + measurement_method='BLSSeasonallyAdjusted') + self.assertDictEqual(actual, expected) + @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_bad_dcids(self, urlopen): """ Calling get_observations with dcids that do not exist returns empty @@ -298,6 +365,30 @@ def test_bad_dcids(self, urlopen): self.assertDictEqual(actual_1, {'dc/p/x6t44d8jd95rd': 18704962.0}) self.assertDictEqual(actual_2, {}) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_new_mixer_api) + def test_bad_dcids_new_mixer_api(self, urlopen): + """ Calling get_observations with dcids that do not exist returns empty + results. + """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + # Get the input + dcids_1 = ['dc/p/x6t44d8jd95rd', 'dc/MadDcid'] + dcids_2 = ['dc/MadDcid', 'dc/MadderDcid'] + + # Call get_observations + actual_1 = dc.get_observations(dcids_1, 'count', 'measuredValue', '2018-12', + observation_period='P1M', + measurement_method='BLSSeasonallyAdjusted') + actual_2 = dc.get_observations(dcids_2, 'count', 'measuredValue', '2018-12', + observation_period='P1M', + measurement_method='BLSSeasonallyAdjusted') + + # Verify the results + self.assertDictEqual(actual_1, {'dc/p/x6t44d8jd95rd': 18704962.0}) + self.assertDictEqual(actual_2, {}) + @mock.patch('urllib.request.urlopen', side_effect=request_mock) def test_no_dcids(self, urlopen): """ Calling get_observations with no dcids returns empty results. """ @@ -309,6 +400,17 @@ def test_no_dcids(self, urlopen): measurement_method='BLSSeasonallyAdjusted') self.assertDictEqual(actual, {}) + @mock.patch('urllib.request.urlopen', side_effect=request_mock_new_mixer_api) + def test_no_dcids_new_mixer_api(self, urlopen): + """ Calling get_observations with no dcids returns empty results. """ + # Set the API key + dc.set_api_key('TEST-API-KEY') + + actual = dc.get_observations([], 'count', 'measuredValue', '2018-12', + observation_period='P1M', + measurement_method='BLSSeasonallyAdjusted') + self.assertDictEqual(actual, {}) + class TestGetPopObs(unittest.TestCase): """ Unit tests for get_pop_obs. """ diff --git a/datacommons/utils.py b/datacommons/utils.py index cce959bd..db9c6000 100644 --- a/datacommons/utils.py +++ b/datacommons/utils.py @@ -21,6 +21,7 @@ from __future__ import print_function from collections import defaultdict +from collections import Mapping import base64 import json @@ -129,13 +130,24 @@ def _send_request(req_url, req_json={}, compress=False, post=True): def _format_expand_payload(payload, new_key, must_exist=[]): """ Formats expand type payloads into dicts from dcids to lists of values. """ # Create the results dictionary from payload - results = defaultdict(set) - for entry in payload: - if 'dcid' in entry and new_key in entry: - dcid = entry['dcid'] - results[dcid].add(entry[new_key]) - - # Ensure all dcids in must_exist have some entry in results. - for dcid in must_exist: - results[dcid] - return {k: sorted(list(v)) for k, v in results.items()} \ No newline at end of file + # If we have changed the mixer API so that it returns a dict mapping + # from dcid to new_key value, we unpack the payload differently. + # TODO: delete this check and change this function so it doesn't use + # new_key and doesn't sort the values of results. + if isinstance(payload, Mapping): + results = {} + for dcid in must_exist: + results[dcid] = [] + for dcid, value in payload.items(): + results[dcid] = sorted(value) if isinstance(value, list) else [value] + return results + else: + results = defaultdict(set) + for entry in payload: + if 'dcid' in entry and new_key in entry: + dcid = entry['dcid'] + results[dcid].add(entry[new_key]) + # Ensure all dcids in must_exist have some entry in results. + for dcid in must_exist: + results[dcid] + return {k: sorted(list(v)) for k, v in results.items()}