datacommonsorg · tjann · Aug 26, 2020 · Aug 23, 2020 · Aug 24, 2020 · Aug 24, 2020
diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Basic examples for StatisticalVariable-based param_set Commons API functions."""
+"""Basic examples for StatisticalVariable-based param_set Data Commons API functions."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -25,16 +25,16 @@ def main():
     param_sets = [
         {
             'place': 'geoId/06085',
-            'stat_var': 'Count_Person',
+            'stat_var': "Count_Person",
         },
         {
             'place': 'geoId/06085',
-            'stat_var': 'Count_Person',
+            'stat_var': "Count_Person",
             'date': '2018',
         },
         {
             'place': 'geoId/06085',
-            'stat_var': 'Count_Person',
+            'stat_var': "Count_Person",
             'date': '2018',
             'measurement_method': 'CensusACS5yrSurvey',
         },
@@ -111,20 +111,20 @@ def call_str(pvs):
 
     pp = pprint.PrettyPrinter(indent=4)
     print(
-        "\nget_stat_all(['geoId/06085', 'country/FRA'], ['Median_Age_Person', 'Count_Person'])"
+        '\nget_stat_all(["geoId/06085", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
     )
     print('>>> ')
     pp.pprint(
-        dc.get_stat_all(['geoId/06085', 'country/FRA'],
-                        ['Median_Age_Person', 'Count_Person']))
+        dc.get_stat_all(["geoId/06085", "country/FRA"],
+                        ["Median_Age_Person", "Count_Person"]))
 
     print(
-        "\nget_stat_all(['badPlaceId', 'country/FRA'], ['Median_Age_Person', 'Count_Person'])"
+        '\nget_stat_all(["badPlaceId", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
     )
     print('>>> ')
     pp.pprint(
-        dc.get_stat_all(['badPlaceId', 'country/FRA'],
-                        ['Median_Age_Person', 'Count_Person']))
+        dc.get_stat_all(["badPlaceId", "country/FRA"],
+                        ["Median_Age_Person", "Count_Person"]))
 
 
 if __name__ == '__main__':

diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
@@ -20,13 +20,8 @@
 from __future__ import division
 from __future__ import print_function
 
-from datacommons.utils import _API_ROOT, _API_ENDPOINTS, _ENV_VAR_API_KEY
-
 import collections
-import json
-import os
-import six.moves.urllib.error
-import six.moves.urllib.request
+import six
 
 import datacommons.utils as utils
 
@@ -148,55 +143,62 @@ def get_stat_all(places, stat_vars):
       >>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"])
       {
         "geoId/05": {
-          "Count_Person": [
-            {
-              "val": {
-                "2010": 1633,
-                "2011": 1509,
-                "2012": 1581,
-              },
-              "observationPeriod": "P1Y",
-              "importName": "Wikidata",
-              "provenanceDomain": "wikidata.org"
-            },
-            {
-              "val": {
-                "2010": 1333,
-                "2011": 1309,
-                "2012": 131,
+          "Count_Person": {
+            "sourceSeries": [
+              {
+                "val": {
+                  "2010": 1633,
+                  "2011": 1509,
+                  "2012": 1581,
+                },
+                "observationPeriod": "P1Y",
+                "importName": "Wikidata",
+                "provenanceDomain": "wikidata.org"
               },
-              "observationPeriod": "P1Y",
-              "importName": "CensusPEPSurvey",
-              "provenanceDomain": "census.gov"
+              {
+                "val": {
+                  "2010": 1333,
+                  "2011": 1309,
+                  "2012": 131,
+                },
+                "observationPeriod": "P1Y",
+                "importName": "CensusPEPSurvey",
+                "provenanceDomain": "census.gov"
+              }
+            ],
             }
-          ],
-          "Count_Person_Male": [
-            {
-              "val": {
-                "2010": 1633,
-                "2011": 1509,
-                "2012": 1581,
-              },
-              "observationPeriod": "P1Y",
-              "importName": "CensusPEPSurvey",
-              "provenanceDomain": "census.gov"
-            }
-          ],
+          },
+          "Count_Person_Male": {
+            "sourceSeries": [
+              {
+                "val": {
+                  "2010": 1633,
+                  "2011": 1509,
+                  "2012": 1581,
+                },
+                "observationPeriod": "P1Y",
+                "importName": "CensusPEPSurvey",
+                "provenanceDomain": "census.gov"
+              }
+            ],
+          }
         },
         "geoId/02": {
-          "Count_Person": [],
-          "Count_Person_Male": [
-            {
-              "val": {
-                "2010": 13,
-                "2011": 13,
-                "2012": 322,
-              },
-              "observationPeriod": "P1Y",
-              "importName": "CensusPEPSurvey",
-              "provenanceDomain": "census.gov"
+          "Count_Person": {},
+          "Count_Person_Male": {
+              "sourceSeries": [
+                {
+                  "val": {
+                    "2010": 13,
+                    "2011": 13,
+                    "2012": 322,
+                  },
+                  "observationPeriod": "P1Y",
+                  "importName": "CensusPEPSurvey",
+                  "provenanceDomain": "census.gov"
+                }
+              ]
             }
-          ],
         }
       }
     """

diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
@@ -29,34 +29,42 @@
 import datacommons.utils as utils
 import json
 import unittest
+import six
 import six.moves.urllib as urllib
 
 # Reusable parts of REST API /stat/all response.
 CA_COUNT_PERSON = {
     "isDcAggregate":
         "true",
-    "sourceSeries": [
-        {
-            "val": {
-                "1990": 23640,
-                "1991": 24100,
-                "1992": 25090,
-            },
-            "observationPeriod": "P1Y",
-            "importName": "WorldDevelopmentIndicators",
-            "provenanceDomain": "worldbank.org"
+    "sourceSeries": [{
+        "val": {
+            "1990": 23640,
+            "1991": 24100,
+            "1993": 25090,
         },
-        {
-            "val": {
-                "1790": 3929214,
-                "1800": 5308483,
-                "1810": 7239881,
-            },
-            "measurementMethod": "WikidataPopulation",
-            "importName": "WikidataPopulation",
-            "provenanceDomain": "wikidata.org"
+        "observationPeriod": "P1Y",
+        "importName": "WorldDevelopmentIndicators",
+        "provenanceDomain": "worldbank.org"
+    }, {
+        "val": {
+            "1790": 3929214,
+            "1800": 5308483,
+            "1810": 7239881,
+        },
+        "measurementMethod": "WikidataPopulation",
+        "importName": "WikidataPopulation",
+        "provenanceDomain": "wikidata.org"
+    }, {
+        "val": {
+            "1890": 28360,
+            "1891": 24910,
+            "1892": 25070,
         },
-    ]
+        "measurementMethod": "OECDRegionalStatistics",
+        "observationPeriod": "P1Y",
+        "importName": "OECDRegionalDemography",
+        "provenanceDomain": "oecd.org"
+    }]
 }
 
 CA_COUNT_PERSON_MALE = {
@@ -100,7 +108,7 @@
     }]
 }
 
-HU22_MEDIAN_AGE_PERSON = {
+CA_MEDIAN_AGE_PERSON = {
     "sourceSeries": [{
         "val": {
             "1990": 12,
@@ -138,37 +146,37 @@ def read(self):
     if req.get_full_url(
     ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person':
         # Response returned when querying with basic args.
-        return MockResponse(json.dumps({'value': 123}))
+        return MockResponse(json.dumps({"value": 123}))
     if req.get_full_url(
     ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&date=2010':
         # Response returned when querying with observationDate.
-        return MockResponse(json.dumps({'value': 133}))
+        return MockResponse(json.dumps({"value": 133}))
     if (req.get_full_url() == stat_value_url_base +
             '?place=geoId/06&stat_var=Count_Person&' +
             'date=2010&measurement_method=CensusPEPSurvey&' +
             'observation_period=P1Y&unit=RealPeople&scaling_factor=100'):
         # Response returned when querying with above optional params.
-        return MockResponse(json.dumps({'value': 103}))
+        return MockResponse(json.dumps({"value": 103}))
 
     # Mock responses for urlopen requests to get_stat_series.
     if req.get_full_url(
     ) == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person':
         # Response returned when querying with basic args.
-        return MockResponse(json.dumps({'series': {'2000': 1, '2001': 2}}))
+        return MockResponse(json.dumps({"series": {"2000": 1, "2001": 2}}))
     if (req.get_full_url() == stat_series_url_base +
             '?place=geoId/06&stat_var=Count_Person&' +
             'measurement_method=CensusPEPSurvey&observation_period=P1Y&' +
             'unit=RealPeople&scaling_factor=100'):
 
         # Response returned when querying with above optional params.
-        return MockResponse(json.dumps({'series': {'2000': 3, '2001': 42}}))
+        return MockResponse(json.dumps({"series": {"2000": 3, "2001": 42}}))
     if (req.get_full_url() == stat_series_url_base +
             '?place=geoId/06&stat_var=Count_Person&' +
             'measurement_method=DNE'):
 
         # Response returned when data not available for optional parameters.
         # /stat/series?place=geoId/06&stat_var=Count_Person&measurement_method=DNE
-        return MockResponse(json.dumps({'series': {}}))
+        return MockResponse(json.dumps({"series": {}}))
 
     # Mock responses for urlopen requests to get_stat_all.
     if req.get_full_url() == stat_all_url_base:
@@ -204,7 +212,7 @@ def read(self):
                     "geoId/06": {
                         "statVarData": {
                             "Count_Person": CA_COUNT_PERSON,
-                            "Median_Age_Person": HU22_MEDIAN_AGE_PERSON
+                            "Median_Age_Person": CA_MEDIAN_AGE_PERSON
                         }
                     },
                     "nuts/HU22": {
@@ -274,7 +282,7 @@ def test_basic(self, urlopen):
         """Calling get_stat_value with minimal and proper args."""
         # Call get_stat_series
         stats = dc.get_stat_series('geoId/06', 'Count_Person')
-        self.assertEqual(stats, {'2000': 1, '2001': 2})
+        self.assertEqual(stats, {"2000": 1, "2001": 2})
 
     @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
     def test_opt_args(self, urlopen):
@@ -283,7 +291,7 @@ def test_opt_args(self, urlopen):
         # Call get_stat_series with all optional args
         stats = dc.get_stat_series('geoId/06', 'Count_Person',
                                    'CensusPEPSurvey', 'P1Y', 'RealPeople', 100)
-        self.assertEqual(stats, {'2000': 3, '2001': 42})
+        self.assertEqual(stats, {"2000": 3, "2001": 42})
 
         # Call get_stat_series with non-satisfiable optional args
         stats = dc.get_stat_series('geoId/06', 'Count_Person', 'DNE')
@@ -316,7 +324,7 @@ def test_basic(self, urlopen):
         exp = {
             "geoId/06": {
                 "Count_Person": CA_COUNT_PERSON,
-                "Median_Age_Person": HU22_MEDIAN_AGE_PERSON
+                "Median_Age_Person": CA_MEDIAN_AGE_PERSON
             },
             "nuts/HU22": {
                 "Count_Person": HU22_COUNT_PERSON,

diff --git a/dcpandas/CHANGELOG.md b/dcpandas/CHANGELOG.md
@@ -0,0 +1,21 @@
+# Changelog
+
+## 0.0.1
+
+**Date** - 08/24/2020
+
+**Release Tag** - [pd.0.0.1](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.1)
+
+**Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
+
+Added pandas wrapper functions.
+
+-   `build_time_series` will construct a pd.Series for a given StatisticalVariable and Place, where dates are the index for the time series.
+-   `build_time_series_dataframe` will construct a pd.DataFrame for a given StatisticalVariable and a set of Places: where Places are the index and date are the columns.
+-   `build_covariate_dataframe` will construct a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places: with Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
+
+For multi-place functions, when a StatisticalVariable has multiple StatVarObservation options,
+Data Commons chooses a set of StatVarObservation options that covers the most geos. This
+ensures that the data fetched for a StatisticalVariable is comparable across places.
+When there is a tie, we select the StatVarObservation options set with the latest date
+data is available for any place.
diff --git a/dcpandas/README.md b/dcpandas/README.md
@@ -0,0 +1,47 @@
+# Data Commons Pandas API
+
+This is a Python library for creating pandas objects with data in the
+Data Commons Graph.
+To get started, install this package from pip.
+
+    pip install dcpandas
+
+Once the package is installed, import `dcpandas`.
+
+    import dcpandas as dcpd
+
+For more detail on getting started with the API, please visit our
+[API Overview](http://docs.datacommons.org/api/).
+
+After you're ready to use the API, you can refer to `dcpandas/examples` for
+examples on how to use this package to perform various tasks. More tutorials and
+documentation can be found at [tutorials](https://datacommons.org/colab)!
+
+## About Data Commons
+
+[Data Commons](https://datacommons.org/) is an open knowledge repository that
+provides a unified view across multiple public data sets and statistics. You can
+view what [datasets](https://datacommons.org/datasets) are currently ingested
+and browse the graph using our [browser](https://browser.datacommons.org/).
+
+## License
+
+Apache 2.0
+
+## Development
+
+Please follow the Development instructions from the root directory.
+
+## Release to PyPI
+
+- Update "VERSION" in setup.py
+- Update CHANGELOG.md for a new version
+- Upload a new package using steps for [generating distribution archives](https://packaging.python.org/tutorials/packaging-projects/#generating-distribution-archives) and [uploading the distribution archives](https://packaging.python.org/tutorials/packaging-projects/#uploading-the-distribution-archives)
+
+## Support
+
+For general questions or issues about the API, please open an issue on our
+[issues](https://github.com/datacommonsorg/api-python/issues) page. For all other
+questions, please send an email to `[email protected]`.
+
+**Note** - This is not an officially supported Google product.