From e05e404e58f3696aa4bfa19ba739db51743e8c09 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 13:33:14 -0700
Subject: [PATCH 01/35] Add pandas wrapper function for time series data frame.

---
 datacommons/__init__.py            |  2 +-
 datacommons/examples/stat_vars.py  |  9 ++++
 datacommons/stat_vars.py           | 55 +++++++++++++++++++++---
 datacommons/test/stat_vars_test.py | 69 ++++++++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 7 deletions(-)

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index da2f9fc4..37219cca 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -19,7 +19,7 @@
 from datacommons.core import get_property_labels, get_property_values, get_triples
 from datacommons.places import get_places_in, get_related_places, get_stats
 from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
-from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all
+from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all, records_place_by_time
 
 # Other utilities
 from .utils import set_api_key
diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index b5e16f08..6ea6ca50 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -126,6 +126,15 @@ def call_str(pvs):
         dc.get_stat_all(['badPlaceId', 'country/FRA'],
                         ['Median_Age_Person', 'Count_Person']))
 
+    print(
+        'dc.records_place_by_time(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
+    )
+    print('>>> ')
+    pp.pprint(
+        dc.records_place_by_time([
+            "geoId/29", "geoId/33"
+        ], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased"))
+
 
 if __name__ == '__main__':
     main()
diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
index ec07569e..306c4019 100644
--- a/datacommons/stat_vars.py
+++ b/datacommons/stat_vars.py
@@ -20,13 +20,8 @@
 from __future__ import division
 from __future__ import print_function
 
-from datacommons.utils import _API_ROOT, _API_ENDPOINTS, _ENV_VAR_API_KEY
-
 import collections
-import json
-import os
-import six.moves.urllib.error
-import six.moves.urllib.request
+import six
 
 import datacommons.utils as utils
 
@@ -215,3 +210,51 @@ def get_stat_all(places, stat_vars):
         for stat_var_dcid, stat_var in place['statVarData'].items():
             place_statvar_series[place_dcid][stat_var_dcid] = stat_var
     return dict(place_statvar_series)
+
+
+# Pandas Helpers
+# These functions are wrapper functions that create Python data structures
+# that are easily converted to Pandas DataFrames (and Series).
+
+
+def records_place_by_time(places, stat_var):
+    """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
+
+    Args:
+      places (`str` or `iterable` of `str`): The dcid of Places to query for.
+      stat_var (`str`): The dcid of the StatisticalVariable.
+    Returns:
+      A `list` of `dict`, one per element of `places`. Each `dict` consists of
+      the time series and place identifier.
+
+    Raises:
+      ValueError: If the payload returned by the Data Commons REST API is
+        malformed.
+
+    Examples:
+      >>> records_place_by_time(["geoId/29", "geoId/33"], "Count_Person")
+          [
+            {'2020-03-07': 20, '2020-03-08': 40, 'place': 'geoId/29'},
+            {'2020-08-21': 428, '2020-08-22': 429, 'place': 'geoId/33'}
+          ]
+    """
+    try:
+        if isinstance(places, six.string_types):
+            places = [places]
+        else:
+            places = list(places)
+    except:
+        raise ValueError(
+            'Parameter `places` must a string object or list-like object.')
+    if not isinstance(stat_var, six.string_types):
+        raise ValueError('Parameter `stat_var` must be a string.')
+
+    stat_all = get_stat_all(places, [stat_var])
+    # Use the first time series result of each Place+StatVar pair.
+    # Create a list of rows to be passed into pd.DataFrame.from_records
+    rows = [
+        dict({'place': place},
+             **data[next(iter(data))]['sourceSeries'][0]['val'])
+        for place, data in stat_all.items()
+    ]
+    return rows
\ No newline at end of file
diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
index 16ae7693..aba31a95 100644
--- a/datacommons/test/stat_vars_test.py
+++ b/datacommons/test/stat_vars_test.py
@@ -217,6 +217,39 @@ def read(self):
             }
             return MockResponse(json.dumps(resp))
 
+        if (data['places'] == ['geoId/06', 'nuts/HU22'] and
+                data['stat_vars'] == ['Count_Person']):
+            # Response returned when querying with above params.
+            resp = {
+                "placeData": {
+                    "geoId/06": {
+                        "statVarData": {
+                            "Count_Person": CA_COUNT_PERSON,
+                        }
+                    },
+                    "nuts/HU22": {
+                        "statVarData": {
+                            "Count_Person": HU22_COUNT_PERSON,
+                        }
+                    }
+                }
+            }
+            return MockResponse(json.dumps(resp))
+
+        if (data['places'] == ['geoId/06'] and
+                data['stat_vars'] == ['Count_Person']):
+            # Response returned when querying with above params.
+            resp = {
+                "placeData": {
+                    "geoId/06": {
+                        "statVarData": {
+                            "Count_Person": CA_COUNT_PERSON,
+                        }
+                    }
+                }
+            }
+            return MockResponse(json.dumps(resp))
+
         if (data['places'] == ['badPlaceId', 'nuts/HU22'] and
                 data['stat_vars'] == ['Count_Person', 'badStatVarId']):
             # Response returned when querying with above params.
@@ -342,5 +375,41 @@ def test_bad_dcids(self, urlopen):
         self.assertDictEqual(stats, exp)
 
 
+class TestRecordsPlaceByTime(unittest.TestCase):
+    """Unit tests for records_place_by_time."""
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_basic(self, urlopen):
+        """Calling records_place_by_time with proper args."""
+        # Expecting at least one TS per Place+StatVar
+        rows = dc.records_place_by_time(['geoId/06', 'nuts/HU22'],
+                                        'Count_Person')
+        exp = [{
+            "1990": 23640,
+            "1991": 24100,
+            "1992": 25090,
+            "place": "geoId/06"
+        }, {
+            "1990": 2360,
+            "1991": 2410,
+            "1992": 2500,
+            "place": "nuts/HU22"
+        }]
+        self.assertEqual(rows, exp)
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_tolerate_place_string(self, urlopen):
+        """Calling records_place_by_time with proper args."""
+        # Expecting at least one TS per Place+StatVar
+        rows = dc.records_place_by_time('geoId/06', 'Count_Person')
+        exp = [{
+            "1990": 23640,
+            "1991": 24100,
+            "1992": 25090,
+            "place": "geoId/06"
+        }]
+        self.assertEqual(rows, exp)
+
+
 if __name__ == '__main__':
     unittest.main()

From 2f070fdeb520f0ea9bdd493729782ad48d12cf6c Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 17:41:27 -0700
Subject: [PATCH 02/35] Save work so far on pandas.

---
 datacommons/__init__.py            |   2 +-
 datacommons/examples/stat_vars.py  |   4 +-
 datacommons/stat_vars.py           | 175 +++++++++++++++++++----------
 datacommons/test/stat_vars_test.py |  30 ++---
 4 files changed, 133 insertions(+), 78 deletions(-)

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index 37219cca..e8059dc2 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -19,7 +19,7 @@
 from datacommons.core import get_property_labels, get_property_values, get_triples
 from datacommons.places import get_places_in, get_related_places, get_stats
 from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
-from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all, records_place_by_time
+from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all, time_series_pd_input
 
 # Other utilities
 from .utils import set_api_key
diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index 6ea6ca50..2de03727 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -127,11 +127,11 @@ def call_str(pvs):
                         ['Median_Age_Person', 'Count_Person']))
 
     print(
-        'dc.records_place_by_time(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
+        'dc.cohort_time_series(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
     )
     print('>>> ')
     pp.pprint(
-        dc.records_place_by_time([
+        dc.cohort_time_series([
             "geoId/29", "geoId/33"
         ], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased"))
 
diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
index 306c4019..0f33ae9f 100644
--- a/datacommons/stat_vars.py
+++ b/datacommons/stat_vars.py
@@ -143,55 +143,62 @@ def get_stat_all(places, stat_vars):
       >>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"])
       {
         "geoId/05": {
-          "Count_Person": [
-            {
-              "val": {
-                "2010": 1633,
-                "2011": 1509,
-                "2012": 1581,
+          "Count_Person": {
+            "sourceSeries": [
+              {
+                "val": {
+                  "2010": 1633,
+                  "2011": 1509,
+                  "2012": 1581,
+                },
+                "observationPeriod": "P1Y",
+                "importName": "Wikidata",
+                "provenanceDomain": "wikidata.org"
               },
-              "observationPeriod": "P1Y",
-              "importName": "Wikidata",
-              "provenanceDomain": "wikidata.org"
-            },
-            {
-              "val": {
-                "2010": 1333,
-                "2011": 1309,
-                "2012": 131,
-              },
-              "observationPeriod": "P1Y",
-              "importName": "CensusPEPSurvey",
-              "provenanceDomain": "census.gov"
-            }
-          ],
-          "Count_Person_Male": [
-            {
-              "val": {
-                "2010": 1633,
-                "2011": 1509,
-                "2012": 1581,
-              },
-              "observationPeriod": "P1Y",
-              "importName": "CensusPEPSurvey",
-              "provenanceDomain": "census.gov"
+              {
+                "val": {
+                  "2010": 1333,
+                  "2011": 1309,
+                  "2012": 131,
+                },
+                "observationPeriod": "P1Y",
+                "importName": "CensusPEPSurvey",
+                "provenanceDomain": "census.gov"
+              }
+            ],
             }
-          ],
+          },
+          "Count_Person_Male": {
+            "sourceSeries": [
+              {
+                "val": {
+                  "2010": 1633,
+                  "2011": 1509,
+                  "2012": 1581,
+                },
+                "observationPeriod": "P1Y",
+                "importName": "CensusPEPSurvey",
+                "provenanceDomain": "census.gov"
+              }
+            ],
+          }
         },
         "geoId/02": {
-          "Count_Person": [],
-          "Count_Person_Male": [
-            {
-              "val": {
-                "2010": 13,
-                "2011": 13,
-                "2012": 322,
-              },
-              "observationPeriod": "P1Y",
-              "importName": "CensusPEPSurvey",
-              "provenanceDomain": "census.gov"
+          "Count_Person": {},
+          "Count_Person_Male": {
+              "sourceSeries": [
+                {
+                  "val": {
+                    "2010": 13,
+                    "2011": 13,
+                    "2012": 322,
+                  },
+                  "observationPeriod": "P1Y",
+                  "importName": "CensusPEPSurvey",
+                  "provenanceDomain": "census.gov"
+                }
+              ]
             }
-          ],
         }
       }
     """
@@ -217,11 +224,44 @@ def get_stat_all(places, stat_vars):
 # that are easily converted to Pandas DataFrames (and Series).
 
 
-def records_place_by_time(places, stat_var):
+def _get_first_time_series(stat_var_data):
+    """Helper function to return one time series."""
+    return stat_var_data['sourceSeries'][0]['val']
+
+
+def time_series_pd_input_options(places, stat_var):
+    """Returns a `dict` mapping StatVarObservation options to `list` of `dict` of time series for each Place.
+    """
+    res = collections.defaultdict(list)
+    stat_all = get_stat_all(places, [stat_var])
+    for place, place_data in stat_all.items():
+        if not place_data:
+            continue
+        stat_var_data = place_data[stat_var]
+        if not stat_var_data:
+            continue
+        for source_series in stat_var_data['sourceSeries']:
+            time_series = source_series['val']
+            # Hashable SVO options.
+            svo_options = (('measurementMethod',
+                            source_series.get('measurementMethod')),
+                           ('observationPeriod',
+                            source_series.get('observationPeriod')),
+                           ('unit', source_series.get('unit')),
+                           ('scalingFactor',
+                            source_series.get('scalingFactor')))
+            res[svo_options].append(dict({'place': place}, **time_series))
+    return dict(res)
+
+
+def time_series_pd_input(places, stat_var):
     """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
 
+    Data Commons will pick a set of StatVarObservation options that covers the
+    maximum number of queried places.
+
     Args:
-      places (`str` or `iterable` of `str`): The dcid of Places to query for.
+      places (`str` or `iterable` of `str`): The dcids of Places to query for.
       stat_var (`str`): The dcid of the StatisticalVariable.
     Returns:
       A `list` of `dict`, one per element of `places`. Each `dict` consists of
@@ -232,7 +272,7 @@ def records_place_by_time(places, stat_var):
         malformed.
 
     Examples:
-      >>> records_place_by_time(["geoId/29", "geoId/33"], "Count_Person")
+      >>> time_series_pd_input(["geoId/29", "geoId/33"], "Count_Person")
           [
             {'2020-03-07': 20, '2020-03-08': 40, 'place': 'geoId/29'},
             {'2020-08-21': 428, '2020-08-22': 429, 'place': 'geoId/33'}
@@ -245,16 +285,37 @@ def records_place_by_time(places, stat_var):
             places = list(places)
     except:
         raise ValueError(
-            'Parameter `places` must a string object or list-like object.')
+            'Parameter `places` must be a string object or list-like object.')
     if not isinstance(stat_var, six.string_types):
         raise ValueError('Parameter `stat_var` must be a string.')
 
-    stat_all = get_stat_all(places, [stat_var])
-    # Use the first time series result of each Place+StatVar pair.
-    # Create a list of rows to be passed into pd.DataFrame.from_records
-    rows = [
-        dict({'place': place},
-             **data[next(iter(data))]['sourceSeries'][0]['val'])
-        for place, data in stat_all.items()
-    ]
-    return rows
\ No newline at end of file
+    rows_dict = time_series_pd_input_options(places, stat_var)
+    most_geos = []
+    max_geos_so_far = 0
+    latest_date = []
+    max_date_so_far = ''
+    for svo, rows in rows_dict.items():
+        current_geos = len(rows)
+        if current_geos > max_geos_so_far:
+            max_geos_so_far = current_geos
+            most_geos = [svo]
+            # Reset tiebreaker stats. Recompute after this if-else block.
+            latest_date = []
+            max_date_so_far = ''
+        elif current_geos == max_geos_so_far:
+            most_geos.append(svo)
+        else:
+            # Do not compute tiebreaker stats if not in most_geos.
+            continue
+        for row in rows:
+            dates = set(row.keys())
+            dates.remove('place')
+            row_max_date = max(dates)
+            if row_max_date > max_date_so_far:
+                max_date_so_far = row_max_date
+                latest_date = [svo]
+            elif row_max_date == max_date_so_far:
+                latest_date.append(svo)
+    for svo in most_geos:
+        if svo in latest_date:
+            return rows_dict[svo]
diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
index aba31a95..72faed60 100644
--- a/datacommons/test/stat_vars_test.py
+++ b/datacommons/test/stat_vars_test.py
@@ -29,6 +29,7 @@
 import datacommons.utils as utils
 import json
 import unittest
+import six
 import six.moves.urllib as urllib
 
 # Reusable parts of REST API /stat/all response.
@@ -40,7 +41,7 @@
             "val": {
                 "1990": 23640,
                 "1991": 24100,
-                "1992": 25090,
+                "1993": 25090,
             },
             "observationPeriod": "P1Y",
             "importName": "WorldDevelopmentIndicators",
@@ -375,37 +376,30 @@ def test_bad_dcids(self, urlopen):
         self.assertDictEqual(stats, exp)
 
 
-class TestRecordsPlaceByTime(unittest.TestCase):
-    """Unit tests for records_place_by_time."""
+class TestPdTimeSeries(unittest.TestCase):
+    """Unit tests for time_series_pd_input."""
 
     @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
     def test_basic(self, urlopen):
-        """Calling records_place_by_time with proper args."""
-        # Expecting at least one TS per Place+StatVar
-        rows = dc.records_place_by_time(['geoId/06', 'nuts/HU22'],
-                                        'Count_Person')
+        """Calling time_series_pd_input with proper args."""
+        rows = dc.time_series_pd_input(['geoId/06', 'nuts/HU22'],
+                                       'Count_Person')
         exp = [{
             "1990": 23640,
             "1991": 24100,
-            "1992": 25090,
+            "1993": 25090,
             "place": "geoId/06"
-        }, {
-            "1990": 2360,
-            "1991": 2410,
-            "1992": 2500,
-            "place": "nuts/HU22"
         }]
-        self.assertEqual(rows, exp)
+        six.assertCountEqual(self, rows, exp)
 
     @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
     def test_tolerate_place_string(self, urlopen):
-        """Calling records_place_by_time with proper args."""
-        # Expecting at least one TS per Place+StatVar
-        rows = dc.records_place_by_time('geoId/06', 'Count_Person')
+        """Calling time_series_pd_input with single string place arg."""
+        rows = dc.time_series_pd_input('geoId/06', 'Count_Person')
         exp = [{
             "1990": 23640,
             "1991": 24100,
-            "1992": 25090,
+            "1993": 25090,
             "place": "geoId/06"
         }]
         self.assertEqual(rows, exp)

From b1feeca25dd09056739ec6c3b7e97ac26766db58 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 19:49:49 -0700
Subject: [PATCH 03/35] Minor edits.

---
 datacommons/examples/stat_vars.py |  2 +-
 datacommons/stat_vars.py          | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index 2de03727..1128378e 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -131,7 +131,7 @@ def call_str(pvs):
     )
     print('>>> ')
     pp.pprint(
-        dc.cohort_time_series([
+        dc.time_series_pd_input([
             "geoId/29", "geoId/33"
         ], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased"))
 
diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
index 0f33ae9f..e56ac18c 100644
--- a/datacommons/stat_vars.py
+++ b/datacommons/stat_vars.py
@@ -223,13 +223,12 @@ def get_stat_all(places, stat_vars):
 # These functions are wrapper functions that create Python data structures
 # that are easily converted to Pandas DataFrames (and Series).
 
+# def _get_first_time_series(stat_var_data):
+#     """Helper function to return one time series."""
+#     return stat_var_data['sourceSeries'][0]['val']
 
-def _get_first_time_series(stat_var_data):
-    """Helper function to return one time series."""
-    return stat_var_data['sourceSeries'][0]['val']
 
-
-def time_series_pd_input_options(places, stat_var):
+def _time_series_pd_input_options(places, stat_var):
     """Returns a `dict` mapping StatVarObservation options to `list` of `dict` of time series for each Place.
     """
     res = collections.defaultdict(list)
@@ -258,7 +257,8 @@ def time_series_pd_input(places, stat_var):
     """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
 
     Data Commons will pick a set of StatVarObservation options that covers the
-    maximum number of queried places.
+    maximum number of queried places. Among ties, Data Commons selects an option
+    set with the latest Observation.
 
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
@@ -289,11 +289,11 @@ def time_series_pd_input(places, stat_var):
     if not isinstance(stat_var, six.string_types):
         raise ValueError('Parameter `stat_var` must be a string.')
 
-    rows_dict = time_series_pd_input_options(places, stat_var)
+    rows_dict = _time_series_pd_input_options(places, stat_var)
     most_geos = []
     max_geos_so_far = 0
     latest_date = []
-    max_date_so_far = ''
+    latest_date_so_far = ''
     for svo, rows in rows_dict.items():
         current_geos = len(rows)
         if current_geos > max_geos_so_far:
@@ -301,7 +301,7 @@ def time_series_pd_input(places, stat_var):
             most_geos = [svo]
             # Reset tiebreaker stats. Recompute after this if-else block.
             latest_date = []
-            max_date_so_far = ''
+            latest_date_so_far = ''
         elif current_geos == max_geos_so_far:
             most_geos.append(svo)
         else:
@@ -311,10 +311,10 @@ def time_series_pd_input(places, stat_var):
             dates = set(row.keys())
             dates.remove('place')
             row_max_date = max(dates)
-            if row_max_date > max_date_so_far:
-                max_date_so_far = row_max_date
+            if row_max_date > latest_date_so_far:
+                latest_date_so_far = row_max_date
                 latest_date = [svo]
-            elif row_max_date == max_date_so_far:
+            elif row_max_date == latest_date_so_far:
                 latest_date.append(svo)
     for svo in most_geos:
         if svo in latest_date:

From 516033b842a4b78e6ea825834c3d8f837ab65352 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 21:24:52 -0700
Subject: [PATCH 04/35] Add function for creating covariate pandas df.

---
 datacommons/stat_vars.py | 92 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
index e56ac18c..d6d8fb4d 100644
--- a/datacommons/stat_vars.py
+++ b/datacommons/stat_vars.py
@@ -319,3 +319,95 @@ def time_series_pd_input(places, stat_var):
     for svo in most_geos:
         if svo in latest_date:
             return rows_dict[svo]
+
+
+def _covariate_pd_input_options(places, stat_vars):
+    """Returns a `dict` mapping each stat_var to a `dict` of StatVarObservation options to `list` of `dict` of latest Observations for each Place. Note that the `observationDate` may differ across
+    Places, even if 
+    """
+    res = collections.defaultdict(lambda: collections.defaultdict(list))
+    stat_all = get_stat_all(places, stat_vars)
+    for place, place_data in stat_all.items():
+        if not place_data:
+            continue
+        for stat_var, stat_var_data in place_data.items():
+            if not stat_var_data:
+                continue
+            for source_series in stat_var_data['sourceSeries']:
+                time_series = source_series['val']
+                latest_obs = time_series[max(time_series)]
+                # Hashable SVO options.
+                svo_options = (('measurementMethod',
+                                source_series.get('measurementMethod')),
+                               ('observationPeriod',
+                                source_series.get('observationPeriod')),
+                               ('unit', source_series.get('unit')),
+                               ('scalingFactor',
+                                source_series.get('scalingFactor')))
+                res[stat_var][svo_options].append({
+                    'place': place,
+                    'val': latest_obs
+                })
+    return {k: dict(v) for k, v in res.items()}
+
+
+def covariate_pd_input(places, stat_vars):
+    """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
+
+    Data Commons will pick a set of StatVarObservation options that covers the
+    maximum number of queried places. Among ties, Data Commons selects an option
+    set with the latest Observation.
+
+    Args:
+      places (`str` or `iterable` of `str`): The dcids of Places to query for.
+      stat_var (`str`): The dcid of the StatisticalVariable.
+    Returns:
+      A `list` of `dict`, one per element of `places`. Each `dict` consists of
+      the time series and place identifier.
+
+    Raises:
+      ValueError: If the payload returned by the Data Commons REST API is
+        malformed.
+
+    Examples:
+      >>> covariate_pd_input(["geoId/29", "geoId/33"], ["Count_Person", "Median_Income_Person"])
+          [
+            {'Count_Person': 20, 'Median_Income_Person': 40, 'place': 'geoId/29'},
+            {'Count_Person': 428, 'Median_Income_Person': 429, 'place': 'geoId/33'}
+          ]
+    """
+
+    try:
+        if isinstance(places, six.string_types):
+            places = [places]
+        else:
+            places = list(places)
+    except:
+        raise ValueError(
+            'Parameter `places` must be a string object or list-like object.')
+    try:
+        if isinstance(stat_vars, six.string_types):
+            stat_vars = [stat_vars]
+        else:
+            stat_vars = list(stat_vars)
+    except:
+        raise ValueError(
+            'Parameter `stat_vars` must be a string object or list-like object.'
+        )
+
+    rows_dict = _covariate_pd_input_options(places, stat_vars)
+    place2cov = collections.defaultdict({})  # {geo: {var1: 3, var2: 33}}
+    for stat_var, candidates_dict in rows_dict.items():
+        selected_rows = None
+        max_rows_so_far = 0
+        for svo, rows in candidates_dict.items():
+            current_geos = len(rows)
+            if current_geos > max_rows_so_far:
+                max_rows_so_far = current_geos
+                selected_rows = rows
+        for row in selected_rows:
+            place2cov[row['place']] = {stat_var: row['val']}
+    return [
+        dict({'place': place}, **covariates)
+        for place, covariates in place2cov.items()
+    ]

From 529aeb3eb205ff576faa089d487561f3e75c005e Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 22:12:49 -0700
Subject: [PATCH 05/35] Add latest date sorting to covariate as well. Add test
 for covariate pd input function.

---
 datacommons/__init__.py            |   2 +-
 datacommons/stat_vars.py           |  39 ++++++++--
 datacommons/test/stat_vars_test.py | 113 ++++++++++++++++++++++-------
 3 files changed, 119 insertions(+), 35 deletions(-)

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index e8059dc2..b5c97e27 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -19,7 +19,7 @@
 from datacommons.core import get_property_labels, get_property_values, get_triples
 from datacommons.places import get_places_in, get_related_places, get_stats
 from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
-from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all, time_series_pd_input
+from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all, time_series_pd_input, covariate_pd_input
 
 # Other utilities
 from .utils import set_api_key
diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
index d6d8fb4d..aadeb52f 100644
--- a/datacommons/stat_vars.py
+++ b/datacommons/stat_vars.py
@@ -335,7 +335,8 @@ def _covariate_pd_input_options(places, stat_vars):
                 continue
             for source_series in stat_var_data['sourceSeries']:
                 time_series = source_series['val']
-                latest_obs = time_series[max(time_series)]
+                latest_date = max(time_series)
+                latest_obs = time_series[latest_date]
                 # Hashable SVO options.
                 svo_options = (('measurementMethod',
                                 source_series.get('measurementMethod')),
@@ -346,6 +347,7 @@ def _covariate_pd_input_options(places, stat_vars):
                                 source_series.get('scalingFactor')))
                 res[stat_var][svo_options].append({
                     'place': place,
+                    'date': latest_date,
                     'val': latest_obs
                 })
     return {k: dict(v) for k, v in res.items()}
@@ -396,17 +398,40 @@ def covariate_pd_input(places, stat_vars):
         )
 
     rows_dict = _covariate_pd_input_options(places, stat_vars)
-    place2cov = collections.defaultdict({})  # {geo: {var1: 3, var2: 33}}
+    place2cov = collections.defaultdict(dict)  # {geo: {var1: 3, var2: 33}}
+
     for stat_var, candidates_dict in rows_dict.items():
         selected_rows = None
-        max_rows_so_far = 0
+        most_geos = []
+        max_geos_so_far = 0
+        latest_date = []
+        latest_date_so_far = ''
         for svo, rows in candidates_dict.items():
             current_geos = len(rows)
-            if current_geos > max_rows_so_far:
-                max_rows_so_far = current_geos
-                selected_rows = rows
+            if current_geos > max_geos_so_far:
+                max_geos_so_far = current_geos
+                most_geos = [svo]
+                # Reset tiebreaker stats. Recompute after this if-else block.
+                latest_date = []
+                latest_date_so_far = ''
+            elif current_geos == max_geos_so_far:
+                most_geos.append(svo)
+            else:
+                # Do not compute tiebreaker stats if not in most_geos.
+                continue
+            for row in rows:
+                row_date = row['date']
+                if row_date > latest_date_so_far:
+                    latest_date_so_far = row_date
+                    latest_date = [svo]
+                elif row_date == latest_date_so_far:
+                    latest_date.append(svo)
+        for svo in most_geos:
+            if svo in latest_date:
+                selected_rows = candidates_dict[svo]
+
         for row in selected_rows:
-            place2cov[row['place']] = {stat_var: row['val']}
+            place2cov[row['place']][stat_var] = row['val']
     return [
         dict({'place': place}, **covariates)
         for place, covariates in place2cov.items()
diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
index 72faed60..ef7f65a4 100644
--- a/datacommons/test/stat_vars_test.py
+++ b/datacommons/test/stat_vars_test.py
@@ -36,28 +36,35 @@
 CA_COUNT_PERSON = {
     "isDcAggregate":
         "true",
-    "sourceSeries": [
-        {
-            "val": {
-                "1990": 23640,
-                "1991": 24100,
-                "1993": 25090,
-            },
-            "observationPeriod": "P1Y",
-            "importName": "WorldDevelopmentIndicators",
-            "provenanceDomain": "worldbank.org"
+    "sourceSeries": [{
+        "val": {
+            "1990": 23640,
+            "1991": 24100,
+            "1993": 25090,
         },
-        {
-            "val": {
-                "1790": 3929214,
-                "1800": 5308483,
-                "1810": 7239881,
-            },
-            "measurementMethod": "WikidataPopulation",
-            "importName": "WikidataPopulation",
-            "provenanceDomain": "wikidata.org"
+        "observationPeriod": "P1Y",
+        "importName": "WorldDevelopmentIndicators",
+        "provenanceDomain": "worldbank.org"
+    }, {
+        "val": {
+            "1790": 3929214,
+            "1800": 5308483,
+            "1810": 7239881,
+        },
+        "measurementMethod": "WikidataPopulation",
+        "importName": "WikidataPopulation",
+        "provenanceDomain": "wikidata.org"
+    }, {
+        "val": {
+            "1890": 28360,
+            "1891": 24910,
+            "1892": 25070,
         },
-    ]
+        "measurementMethod": "OECDRegionalStatistics",
+        "observationPeriod": "P1Y",
+        "importName": "OECDRegionalDemography",
+        "provenanceDomain": "oecd.org"
+    }]
 }
 
 CA_COUNT_PERSON_MALE = {
@@ -101,7 +108,7 @@
     }]
 }
 
-HU22_MEDIAN_AGE_PERSON = {
+CA_MEDIAN_AGE_PERSON = {
     "sourceSeries": [{
         "val": {
             "1990": 12,
@@ -205,7 +212,7 @@ def read(self):
                     "geoId/06": {
                         "statVarData": {
                             "Count_Person": CA_COUNT_PERSON,
-                            "Median_Age_Person": HU22_MEDIAN_AGE_PERSON
+                            "Median_Age_Person": CA_MEDIAN_AGE_PERSON
                         }
                     },
                     "nuts/HU22": {
@@ -273,6 +280,27 @@ def read(self):
             }
             return MockResponse(json.dumps(resp))
 
+        if (data['places'] == ['geoId/06', 'nuts/HU22'] and
+                data['stat_vars'] == ['Count_Person', 'Median_Age_Person']):
+            # Response returned when querying with above params.
+            # Median Age missing for HU22.
+            resp = {
+                "placeData": {
+                    "geoId/06": {
+                        "statVarData": {
+                            "Count_Person": CA_COUNT_PERSON,
+                            "Median_Age_Person": CA_MEDIAN_AGE_PERSON
+                        }
+                    },
+                    "nuts/HU22": {
+                        "statVarData": {
+                            "Count_Person": HU22_COUNT_PERSON,
+                            "Median_Age_Person": {}
+                        }
+                    }
+                }
+            }
+            return MockResponse(json.dumps(resp))
     # Otherwise, return an empty response and a 404.
     return urllib.error.HTTPError
 
@@ -350,7 +378,7 @@ def test_basic(self, urlopen):
         exp = {
             "geoId/06": {
                 "Count_Person": CA_COUNT_PERSON,
-                "Median_Age_Person": HU22_MEDIAN_AGE_PERSON
+                "Median_Age_Person": CA_MEDIAN_AGE_PERSON
             },
             "nuts/HU22": {
                 "Count_Person": HU22_COUNT_PERSON,
@@ -385,10 +413,15 @@ def test_basic(self, urlopen):
         rows = dc.time_series_pd_input(['geoId/06', 'nuts/HU22'],
                                        'Count_Person')
         exp = [{
-            "1990": 23640,
-            "1991": 24100,
-            "1993": 25090,
-            "place": "geoId/06"
+            '1890': 28360,
+            '1891': 24910,
+            '1892': 25070,
+            'place': 'geoId/06'
+        }, {
+            '1991': 2410,
+            '1990': 2360,
+            '1992': 2500,
+            'place': 'nuts/HU22'
         }]
         six.assertCountEqual(self, rows, exp)
 
@@ -405,5 +438,31 @@ def test_tolerate_place_string(self, urlopen):
         self.assertEqual(rows, exp)
 
 
+class TestPdCovariates(unittest.TestCase):
+    """Unit tests for covariate_pd_input."""
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_basic(self, urlopen):
+        """Calling covariate_pd_input with proper args."""
+        rows = dc.covariate_pd_input(['geoId/06', 'nuts/HU22'],
+                                     ['Count_Person', 'Median_Age_Person'])
+        exp = [{
+            "place": "geoId/06",
+            "Median_Age_Person": 24,
+            "Count_Person": 25070
+        }, {
+            "place": "nuts/HU22",
+            "Count_Person": 2500
+        }]
+        six.assertCountEqual(self, rows, exp)
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_tolerate_place_string(self, urlopen):
+        """Calling covariate_pd_input with single string place arg."""
+        rows = dc.covariate_pd_input(['geoId/06'], 'Count_Person')
+        exp = [{"place": "geoId/06", "Count_Person": 25090}]
+        self.assertEqual(rows, exp)
+
+
 if __name__ == '__main__':
     unittest.main()

From a7868e24150d76566c858d1d26afb7ef3e3a9c0c Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 22:20:43 -0700
Subject: [PATCH 06/35] stat_vars_test: make response and expected response
 strings consistently double quoted.

---
 datacommons/test/stat_vars_test.py | 32 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
index ef7f65a4..df02732f 100644
--- a/datacommons/test/stat_vars_test.py
+++ b/datacommons/test/stat_vars_test.py
@@ -146,37 +146,37 @@ def read(self):
     if req.get_full_url(
     ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person':
         # Response returned when querying with basic args.
-        return MockResponse(json.dumps({'value': 123}))
+        return MockResponse(json.dumps({"value": 123}))
     if req.get_full_url(
     ) == stat_value_url_base + '?place=geoId/06&stat_var=Count_Person&date=2010':
         # Response returned when querying with observationDate.
-        return MockResponse(json.dumps({'value': 133}))
+        return MockResponse(json.dumps({"value": 133}))
     if (req.get_full_url() == stat_value_url_base +
             '?place=geoId/06&stat_var=Count_Person&' +
             'date=2010&measurement_method=CensusPEPSurvey&' +
             'observation_period=P1Y&unit=RealPeople&scaling_factor=100'):
         # Response returned when querying with above optional params.
-        return MockResponse(json.dumps({'value': 103}))
+        return MockResponse(json.dumps({"value": 103}))
 
     # Mock responses for urlopen requests to get_stat_series.
     if req.get_full_url(
     ) == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person':
         # Response returned when querying with basic args.
-        return MockResponse(json.dumps({'series': {'2000': 1, '2001': 2}}))
+        return MockResponse(json.dumps({"series": {"2000": 1, "2001": 2}}))
     if (req.get_full_url() == stat_series_url_base +
             '?place=geoId/06&stat_var=Count_Person&' +
             'measurement_method=CensusPEPSurvey&observation_period=P1Y&' +
             'unit=RealPeople&scaling_factor=100'):
 
         # Response returned when querying with above optional params.
-        return MockResponse(json.dumps({'series': {'2000': 3, '2001': 42}}))
+        return MockResponse(json.dumps({"series": {"2000": 3, "2001": 42}}))
     if (req.get_full_url() == stat_series_url_base +
             '?place=geoId/06&stat_var=Count_Person&' +
             'measurement_method=DNE'):
 
         # Response returned when data not available for optional parameters.
         # /stat/series?place=geoId/06&stat_var=Count_Person&measurement_method=DNE
-        return MockResponse(json.dumps({'series': {}}))
+        return MockResponse(json.dumps({"series": {}}))
 
     # Mock responses for urlopen requests to get_stat_all.
     if req.get_full_url() == stat_all_url_base:
@@ -336,7 +336,7 @@ def test_basic(self, urlopen):
         """Calling get_stat_value with minimal and proper args."""
         # Call get_stat_series
         stats = dc.get_stat_series('geoId/06', 'Count_Person')
-        self.assertEqual(stats, {'2000': 1, '2001': 2})
+        self.assertEqual(stats, {"2000": 1, "2001": 2})
 
     @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
     def test_opt_args(self, urlopen):
@@ -345,7 +345,7 @@ def test_opt_args(self, urlopen):
         # Call get_stat_series with all optional args
         stats = dc.get_stat_series('geoId/06', 'Count_Person',
                                    'CensusPEPSurvey', 'P1Y', 'RealPeople', 100)
-        self.assertEqual(stats, {'2000': 3, '2001': 42})
+        self.assertEqual(stats, {"2000": 3, "2001": 42})
 
         # Call get_stat_series with non-satisfiable optional args
         stats = dc.get_stat_series('geoId/06', 'Count_Person', 'DNE')
@@ -413,15 +413,15 @@ def test_basic(self, urlopen):
         rows = dc.time_series_pd_input(['geoId/06', 'nuts/HU22'],
                                        'Count_Person')
         exp = [{
-            '1890': 28360,
-            '1891': 24910,
-            '1892': 25070,
-            'place': 'geoId/06'
+            "1890": 28360,
+            "1891": 24910,
+            "1892": 25070,
+            "place": "geoId/06"
         }, {
-            '1991': 2410,
-            '1990': 2360,
-            '1992': 2500,
-            'place': 'nuts/HU22'
+            "1991": 2410,
+            "1990": 2360,
+            "1992": 2500,
+            "place": "nuts/HU22"
         }]
         six.assertCountEqual(self, rows, exp)
 

From ea3c2ff705ec4aeebd359a75b79c20003576fc23 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 22:24:02 -0700
Subject: [PATCH 07/35] Add an example for covariate_pd_input

---
 datacommons/examples/stat_vars.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index 1128378e..6ee550f7 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -127,13 +127,20 @@ def call_str(pvs):
                         ['Median_Age_Person', 'Count_Person']))
 
     print(
-        'dc.cohort_time_series(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
+        'dc.time_series_pd_input(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
     )
     print('>>> ')
     pp.pprint(
-        dc.time_series_pd_input([
-            "geoId/29", "geoId/33"
-        ], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased"))
+        dc.time_series_pd_input(["geoId/29", "geoId/33"],
+                                "Median_Income_Person"))
+
+    print(
+        "dc.covariate_pd_input(['geoId/06', 'country/FRA'], 'Median_Age_Person', 'Count_Person'])"
+    )
+    print('>>> ')
+    pp.pprint(
+        dc.covariate_pd_input(['geoId/06', 'country/FRA'],
+                              ['Median_Age_Person', 'Count_Person']))
 
 
 if __name__ == '__main__':

From ab3f755d50fb5a99451e712b81454029a8aaef5e Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Sun, 23 Aug 2020 22:31:22 -0700
Subject: [PATCH 08/35] Make stat_var examples quoting consistent.

---
 datacommons/examples/stat_vars.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index 6ee550f7..974a7817 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -25,16 +25,16 @@ def main():
     param_sets = [
         {
             'place': 'geoId/06085',
-            'stat_var': 'Count_Person',
+            'stat_var': "Count_Person",
         },
         {
             'place': 'geoId/06085',
-            'stat_var': 'Count_Person',
+            'stat_var': "Count_Person",
             'date': '2018',
         },
         {
             'place': 'geoId/06085',
-            'stat_var': 'Count_Person',
+            'stat_var': "Count_Person",
             'date': '2018',
             'measurement_method': 'CensusACS5yrSurvey',
         },
@@ -111,20 +111,20 @@ def call_str(pvs):
 
     pp = pprint.PrettyPrinter(indent=4)
     print(
-        "\nget_stat_all(['geoId/06085', 'country/FRA'], ['Median_Age_Person', 'Count_Person'])"
+        '\nget_stat_all(["geoId/06085", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
     )
     print('>>> ')
     pp.pprint(
-        dc.get_stat_all(['geoId/06085', 'country/FRA'],
-                        ['Median_Age_Person', 'Count_Person']))
+        dc.get_stat_all(["geoId/06085", "country/FRA"],
+                        ["Median_Age_Person", "Count_Person"]))
 
     print(
-        "\nget_stat_all(['badPlaceId', 'country/FRA'], ['Median_Age_Person', 'Count_Person'])"
+        '\nget_stat_all(["badPlaceId", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
     )
     print('>>> ')
     pp.pprint(
-        dc.get_stat_all(['badPlaceId', 'country/FRA'],
-                        ['Median_Age_Person', 'Count_Person']))
+        dc.get_stat_all(["badPlaceId", "country/FRA"],
+                        ["Median_Age_Person", "Count_Person"]))
 
     print(
         'dc.time_series_pd_input(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
@@ -135,12 +135,12 @@ def call_str(pvs):
                                 "Median_Income_Person"))
 
     print(
-        "dc.covariate_pd_input(['geoId/06', 'country/FRA'], 'Median_Age_Person', 'Count_Person'])"
+        'dc.covariate_pd_input(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
     )
     print('>>> ')
     pp.pprint(
-        dc.covariate_pd_input(['geoId/06', 'country/FRA'],
-                              ['Median_Age_Person', 'Count_Person']))
+        dc.covariate_pd_input(["geoId/06", "country/FRA"],
+                              ["Median_Age_Person", "Count_Person"]))
 
 
 if __name__ == '__main__':

From e72ae4a17ef15829a9e9cdef69909cefd0f6987b Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 02:10:46 -0700
Subject: [PATCH 09/35] Create dcpandas module that uses pandas natively.

---
 datacommons/__init__.py            |   2 +-
 datacommons/examples/stat_vars.py  |   2 +-
 datacommons/stat_vars.py           | 219 ---------------------
 datacommons/test/stat_vars_test.py |  60 ------
 dcpandas/CHANGELOG.md              |  21 ++
 dcpandas/README.md                 |  47 +++++
 dcpandas/__init__.py               |  16 ++
 dcpandas/df_builder.py             | 298 +++++++++++++++++++++++++++++
 dcpandas/examples/__init__.py      |  13 ++
 dcpandas/examples/df_builder.py    |  84 ++++++++
 dcpandas/setup.py                  |  59 ++++++
 dcpandas/stat_vars.py              |   1 +
 dcpandas/test/__init__.py          |  13 ++
 dcpandas/test/df_builder_test.py   | 286 +++++++++++++++++++++++++++
 dcpandas/utils.py                  |   1 +
 requirements.txt                   |   1 +
 setup.py                           |   3 +-
 17 files changed, 843 insertions(+), 283 deletions(-)
 create mode 100644 dcpandas/CHANGELOG.md
 create mode 100644 dcpandas/README.md
 create mode 100644 dcpandas/__init__.py
 create mode 100644 dcpandas/df_builder.py
 create mode 100644 dcpandas/examples/__init__.py
 create mode 100644 dcpandas/examples/df_builder.py
 create mode 100644 dcpandas/setup.py
 create mode 120000 dcpandas/stat_vars.py
 create mode 100644 dcpandas/test/__init__.py
 create mode 100644 dcpandas/test/df_builder_test.py
 create mode 120000 dcpandas/utils.py

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index b5c97e27..da2f9fc4 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -19,7 +19,7 @@
 from datacommons.core import get_property_labels, get_property_values, get_triples
 from datacommons.places import get_places_in, get_related_places, get_stats
 from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
-from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all, time_series_pd_input, covariate_pd_input
+from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all
 
 # Other utilities
 from .utils import set_api_key
diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index 974a7817..9b7c29b5 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Basic examples for StatisticalVariable-based param_set Commons API functions."""
+"""Basic examples for StatisticalVariable-based param_set Data Commons API functions."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
index aadeb52f..64cf1759 100644
--- a/datacommons/stat_vars.py
+++ b/datacommons/stat_vars.py
@@ -217,222 +217,3 @@ def get_stat_all(places, stat_vars):
         for stat_var_dcid, stat_var in place['statVarData'].items():
             place_statvar_series[place_dcid][stat_var_dcid] = stat_var
     return dict(place_statvar_series)
-
-
-# Pandas Helpers
-# These functions are wrapper functions that create Python data structures
-# that are easily converted to Pandas DataFrames (and Series).
-
-# def _get_first_time_series(stat_var_data):
-#     """Helper function to return one time series."""
-#     return stat_var_data['sourceSeries'][0]['val']
-
-
-def _time_series_pd_input_options(places, stat_var):
-    """Returns a `dict` mapping StatVarObservation options to `list` of `dict` of time series for each Place.
-    """
-    res = collections.defaultdict(list)
-    stat_all = get_stat_all(places, [stat_var])
-    for place, place_data in stat_all.items():
-        if not place_data:
-            continue
-        stat_var_data = place_data[stat_var]
-        if not stat_var_data:
-            continue
-        for source_series in stat_var_data['sourceSeries']:
-            time_series = source_series['val']
-            # Hashable SVO options.
-            svo_options = (('measurementMethod',
-                            source_series.get('measurementMethod')),
-                           ('observationPeriod',
-                            source_series.get('observationPeriod')),
-                           ('unit', source_series.get('unit')),
-                           ('scalingFactor',
-                            source_series.get('scalingFactor')))
-            res[svo_options].append(dict({'place': place}, **time_series))
-    return dict(res)
-
-
-def time_series_pd_input(places, stat_var):
-    """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
-
-    Data Commons will pick a set of StatVarObservation options that covers the
-    maximum number of queried places. Among ties, Data Commons selects an option
-    set with the latest Observation.
-
-    Args:
-      places (`str` or `iterable` of `str`): The dcids of Places to query for.
-      stat_var (`str`): The dcid of the StatisticalVariable.
-    Returns:
-      A `list` of `dict`, one per element of `places`. Each `dict` consists of
-      the time series and place identifier.
-
-    Raises:
-      ValueError: If the payload returned by the Data Commons REST API is
-        malformed.
-
-    Examples:
-      >>> time_series_pd_input(["geoId/29", "geoId/33"], "Count_Person")
-          [
-            {'2020-03-07': 20, '2020-03-08': 40, 'place': 'geoId/29'},
-            {'2020-08-21': 428, '2020-08-22': 429, 'place': 'geoId/33'}
-          ]
-    """
-    try:
-        if isinstance(places, six.string_types):
-            places = [places]
-        else:
-            places = list(places)
-    except:
-        raise ValueError(
-            'Parameter `places` must be a string object or list-like object.')
-    if not isinstance(stat_var, six.string_types):
-        raise ValueError('Parameter `stat_var` must be a string.')
-
-    rows_dict = _time_series_pd_input_options(places, stat_var)
-    most_geos = []
-    max_geos_so_far = 0
-    latest_date = []
-    latest_date_so_far = ''
-    for svo, rows in rows_dict.items():
-        current_geos = len(rows)
-        if current_geos > max_geos_so_far:
-            max_geos_so_far = current_geos
-            most_geos = [svo]
-            # Reset tiebreaker stats. Recompute after this if-else block.
-            latest_date = []
-            latest_date_so_far = ''
-        elif current_geos == max_geos_so_far:
-            most_geos.append(svo)
-        else:
-            # Do not compute tiebreaker stats if not in most_geos.
-            continue
-        for row in rows:
-            dates = set(row.keys())
-            dates.remove('place')
-            row_max_date = max(dates)
-            if row_max_date > latest_date_so_far:
-                latest_date_so_far = row_max_date
-                latest_date = [svo]
-            elif row_max_date == latest_date_so_far:
-                latest_date.append(svo)
-    for svo in most_geos:
-        if svo in latest_date:
-            return rows_dict[svo]
-
-
-def _covariate_pd_input_options(places, stat_vars):
-    """Returns a `dict` mapping each stat_var to a `dict` of StatVarObservation options to `list` of `dict` of latest Observations for each Place. Note that the `observationDate` may differ across
-    Places, even if 
-    """
-    res = collections.defaultdict(lambda: collections.defaultdict(list))
-    stat_all = get_stat_all(places, stat_vars)
-    for place, place_data in stat_all.items():
-        if not place_data:
-            continue
-        for stat_var, stat_var_data in place_data.items():
-            if not stat_var_data:
-                continue
-            for source_series in stat_var_data['sourceSeries']:
-                time_series = source_series['val']
-                latest_date = max(time_series)
-                latest_obs = time_series[latest_date]
-                # Hashable SVO options.
-                svo_options = (('measurementMethod',
-                                source_series.get('measurementMethod')),
-                               ('observationPeriod',
-                                source_series.get('observationPeriod')),
-                               ('unit', source_series.get('unit')),
-                               ('scalingFactor',
-                                source_series.get('scalingFactor')))
-                res[stat_var][svo_options].append({
-                    'place': place,
-                    'date': latest_date,
-                    'val': latest_obs
-                })
-    return {k: dict(v) for k, v in res.items()}
-
-
-def covariate_pd_input(places, stat_vars):
-    """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
-
-    Data Commons will pick a set of StatVarObservation options that covers the
-    maximum number of queried places. Among ties, Data Commons selects an option
-    set with the latest Observation.
-
-    Args:
-      places (`str` or `iterable` of `str`): The dcids of Places to query for.
-      stat_var (`str`): The dcid of the StatisticalVariable.
-    Returns:
-      A `list` of `dict`, one per element of `places`. Each `dict` consists of
-      the time series and place identifier.
-
-    Raises:
-      ValueError: If the payload returned by the Data Commons REST API is
-        malformed.
-
-    Examples:
-      >>> covariate_pd_input(["geoId/29", "geoId/33"], ["Count_Person", "Median_Income_Person"])
-          [
-            {'Count_Person': 20, 'Median_Income_Person': 40, 'place': 'geoId/29'},
-            {'Count_Person': 428, 'Median_Income_Person': 429, 'place': 'geoId/33'}
-          ]
-    """
-
-    try:
-        if isinstance(places, six.string_types):
-            places = [places]
-        else:
-            places = list(places)
-    except:
-        raise ValueError(
-            'Parameter `places` must be a string object or list-like object.')
-    try:
-        if isinstance(stat_vars, six.string_types):
-            stat_vars = [stat_vars]
-        else:
-            stat_vars = list(stat_vars)
-    except:
-        raise ValueError(
-            'Parameter `stat_vars` must be a string object or list-like object.'
-        )
-
-    rows_dict = _covariate_pd_input_options(places, stat_vars)
-    place2cov = collections.defaultdict(dict)  # {geo: {var1: 3, var2: 33}}
-
-    for stat_var, candidates_dict in rows_dict.items():
-        selected_rows = None
-        most_geos = []
-        max_geos_so_far = 0
-        latest_date = []
-        latest_date_so_far = ''
-        for svo, rows in candidates_dict.items():
-            current_geos = len(rows)
-            if current_geos > max_geos_so_far:
-                max_geos_so_far = current_geos
-                most_geos = [svo]
-                # Reset tiebreaker stats. Recompute after this if-else block.
-                latest_date = []
-                latest_date_so_far = ''
-            elif current_geos == max_geos_so_far:
-                most_geos.append(svo)
-            else:
-                # Do not compute tiebreaker stats if not in most_geos.
-                continue
-            for row in rows:
-                row_date = row['date']
-                if row_date > latest_date_so_far:
-                    latest_date_so_far = row_date
-                    latest_date = [svo]
-                elif row_date == latest_date_so_far:
-                    latest_date.append(svo)
-        for svo in most_geos:
-            if svo in latest_date:
-                selected_rows = candidates_dict[svo]
-
-        for row in selected_rows:
-            place2cov[row['place']][stat_var] = row['val']
-    return [
-        dict({'place': place}, **covariates)
-        for place, covariates in place2cov.items()
-    ]
diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
index df02732f..60ef037a 100644
--- a/datacommons/test/stat_vars_test.py
+++ b/datacommons/test/stat_vars_test.py
@@ -404,65 +404,5 @@ def test_bad_dcids(self, urlopen):
         self.assertDictEqual(stats, exp)
 
 
-class TestPdTimeSeries(unittest.TestCase):
-    """Unit tests for time_series_pd_input."""
-
-    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
-    def test_basic(self, urlopen):
-        """Calling time_series_pd_input with proper args."""
-        rows = dc.time_series_pd_input(['geoId/06', 'nuts/HU22'],
-                                       'Count_Person')
-        exp = [{
-            "1890": 28360,
-            "1891": 24910,
-            "1892": 25070,
-            "place": "geoId/06"
-        }, {
-            "1991": 2410,
-            "1990": 2360,
-            "1992": 2500,
-            "place": "nuts/HU22"
-        }]
-        six.assertCountEqual(self, rows, exp)
-
-    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
-    def test_tolerate_place_string(self, urlopen):
-        """Calling time_series_pd_input with single string place arg."""
-        rows = dc.time_series_pd_input('geoId/06', 'Count_Person')
-        exp = [{
-            "1990": 23640,
-            "1991": 24100,
-            "1993": 25090,
-            "place": "geoId/06"
-        }]
-        self.assertEqual(rows, exp)
-
-
-class TestPdCovariates(unittest.TestCase):
-    """Unit tests for covariate_pd_input."""
-
-    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
-    def test_basic(self, urlopen):
-        """Calling covariate_pd_input with proper args."""
-        rows = dc.covariate_pd_input(['geoId/06', 'nuts/HU22'],
-                                     ['Count_Person', 'Median_Age_Person'])
-        exp = [{
-            "place": "geoId/06",
-            "Median_Age_Person": 24,
-            "Count_Person": 25070
-        }, {
-            "place": "nuts/HU22",
-            "Count_Person": 2500
-        }]
-        six.assertCountEqual(self, rows, exp)
-
-    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
-    def test_tolerate_place_string(self, urlopen):
-        """Calling covariate_pd_input with single string place arg."""
-        rows = dc.covariate_pd_input(['geoId/06'], 'Count_Person')
-        exp = [{"place": "geoId/06", "Count_Person": 25090}]
-        self.assertEqual(rows, exp)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/dcpandas/CHANGELOG.md b/dcpandas/CHANGELOG.md
new file mode 100644
index 00000000..5249b4f6
--- /dev/null
+++ b/dcpandas/CHANGELOG.md
@@ -0,0 +1,21 @@
+# Changelog
+
+## 0.01
+
+**Date** - 08/24/2020
+
+**Release Tag** - [pd.0.0.1](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.1)
+
+**Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
+
+Added Pandas wrapper functions.
+
+-   `build_time_series` will construct a pd.Series for a given StatisticalVariable and Place, where dates are the index for the time series.
+-   `build_time_series_dataframe` will construct a pd.DataFrame for a given StatisticalVariable and a set of Places: where Places are the index and date are the columns.
+-   `build_covariate_dataframe` will construct a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places: with Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
+
+For multi-place functions, when a StatisticalVariable has multiple StatVarObservation options,
+Data Commons chooses a set of StatVarObservation options that covers the most geos. This
+ensures that the data fetched for a StatisticalVariable is comparable across places.
+When there is a tie, we select the StatVarObservation options set with the latest date
+data is available for any place.
diff --git a/dcpandas/README.md b/dcpandas/README.md
new file mode 100644
index 00000000..1b8c6158
--- /dev/null
+++ b/dcpandas/README.md
@@ -0,0 +1,47 @@
+# Data Commons Pandas API
+
+This is a Python library for creating Pandas objects with data in the
+Data Commons Graph.
+To get started, install this package from pip.
+
+    pip install datacommons-pandas
+
+Once the package is installed, import `datacommons-pandas`.
+
+    import datacommons-pandas as dcpd
+
+For more detail on getting started with the API, please visit our
+[API Overview](http://docs.datacommons.org/api/).
+
+After you're ready to use the API, you can refer to `datacommons-pandas/examples` for
+examples on how to use this package to perform various tasks. More tutorials and
+documentation can be found at [tutorials](https://datacommons.org/colab)!
+
+## About Data Commons
+
+[Data Commons](https://datacommons.org/) is an open knowledge repository that
+provides a unified view across multiple public data sets and statistics. You can
+view what [datasets](https://datacommons.org/datasets) are currently ingested
+and browse the graph using our [browser](https://browser.datacommons.org/).
+
+## License
+
+Apache 2.0
+
+## Development
+
+Please follow the Development instructions from the root directory.
+
+## Release to PyPI
+
+- Update "VERSION" in setup.py
+- Update CHANGELOG.md for a new version
+- Upload a new package using steps for [generating distribution archives](https://packaging.python.org/tutorials/packaging-projects/#generating-distribution-archives) and [uploading the distribution archives](https://packaging.python.org/tutorials/packaging-projects/#uploading-the-distribution-archives)
+
+## Support
+
+For general questions or issues about the API, please open an issue on our
+[issues](https://github.com/datacommonsorg/api-python/issues) page. For all other
+questions, please send an email to `support@datacommons.org`.
+
+**Note** - This is not an officially supported Google product.
diff --git a/dcpandas/__init__.py b/dcpandas/__init__.py
new file mode 100644
index 00000000..baa42576
--- /dev/null
+++ b/dcpandas/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dcpandas.stat_vars import get_stat_value, get_stat_series, get_stat_all
+from dcpandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
\ No newline at end of file
diff --git a/dcpandas/df_builder.py b/dcpandas/df_builder.py
new file mode 100644
index 00000000..432b148c
--- /dev/null
+++ b/dcpandas/df_builder.py
@@ -0,0 +1,298 @@
+# Copyright 2020 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Data Commons Python API Stat Module.
+
+Provides functions for getting data on StatisticalVariables from Data Commons Graph.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import pandas as pd
+import six
+
+import dcpandas.stat_vars as dc
+
+# Pandas Helpers
+# These functions are wrapper functions that create Python data structures
+# that are easily converted to Pandas DataFrames (and Series).
+
+
+def build_time_series(place, stat_var):
+    """Constructs a pandas Series with `dates` as the index and corresponding `stat_var` statistics as values.
+    
+    Args:
+      place (`str`): The dcid of Place to query for.
+      stat_var (`str`): The dcid of the StatisticalVariable.
+    Returns:
+      A pandas Series with Place IDs as the index, and Observed statistics as values.
+    """
+    return pd.Series(dc.get_stat_series(place, stat_var))
+
+
+def _group_stat_all_by_obs_options(places, stat_vars, mode):
+    """Groups the result of `get_stat_all` by Observation options for time series.
+    
+    Args:
+      places (`str` or `iterable` of `str`): The dcids of Places to query for.
+      stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
+      mode (`str`): "series" to output time series grouped by Observation options, or
+        "covariates" to output latest Observations.
+    Returns:
+      A pandas Series with Place IDs as the index, and Observed statistics as values.
+
+    Raises:
+      ValueError: If the payload returned by the Data Commons REST API is
+        malformed.
+    """
+    kseries = "series"
+    kcov = "covariates"
+
+    if mode == kseries:
+        if len(stat_vars) != 1:
+            raise ValueError(
+                'When `mode=series`, only one StatisticalVariable for `stat_vars` is allowed.'
+            )
+        res = collections.defaultdict(list)
+    elif mode == kcov:
+        res = collections.defaultdict(lambda: collections.defaultdict(list))
+    else:
+        raise ValueError(
+            'Value of `mode` must be one of ("series", "covariates")')
+
+    stat_all = dc.get_stat_all(places, stat_vars)
+    for place, place_data in stat_all.items():
+        if not place_data:
+            continue
+        for stat_var, stat_var_data in place_data.items():
+            if not stat_var_data:
+                continue
+            for source_series in stat_var_data['sourceSeries']:
+                time_series = source_series['val']
+                # Create a hashable for Observation options.
+                obs_options = (('measurementMethod',
+                                source_series.get('measurementMethod')),
+                               ('observationPeriod',
+                                source_series.get('observationPeriod')),
+                               ('unit', source_series.get('unit')),
+                               ('scalingFactor',
+                                source_series.get('scalingFactor')))
+                if mode == kseries:
+                    res[obs_options].append(
+                        dict({'place': place}, **time_series))
+                elif mode == kcov:
+                    date = max(time_series)
+                    res[stat_var][obs_options].append({
+                        'place': place,
+                        'date': date,
+                        'val': time_series[date]
+                    })
+    if mode == kseries:
+        return dict(res)
+    elif mode == kcov:
+        return {k: dict(v) for k, v in res.items()}
+
+
+def _time_series_pd_input(places, stat_var):
+    """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
+
+    Data Commons will pick a set of Observation options that covers the
+    maximum number of queried places. Among ties, Data Commons selects an option
+    set with the latest Observation.
+
+    Args:
+      places (`str` or `iterable` of `str`): The dcids of Places to query for.
+      stat_var (`str`): The dcid of the StatisticalVariable.
+    Returns:
+      A `list` of `dict`, one per element of `places`. Each `dict` consists of
+      the time series and place identifier.
+
+    Examples:
+      >>> _time_series_pd_input(["geoId/29", "geoId/33"], "Count_Person")
+          [
+            {'2020-03-07': 20, '2020-03-08': 40, 'place': 'geoId/29'},
+            {'2020-08-21': 428, '2020-08-22': 429, 'place': 'geoId/33'}
+          ]
+    """
+
+    rows_dict = _group_stat_all_by_obs_options(places, [stat_var], 'series')
+    most_geos = []
+    max_geos_so_far = 0
+    latest_date = []
+    latest_date_so_far = ''
+    for options, rows in rows_dict.items():
+        current_geos = len(rows)
+        if current_geos > max_geos_so_far:
+            max_geos_so_far = current_geos
+            most_geos = [options]
+            # Reset tiebreaker stats. Recompute after this if-else block.
+            latest_date = []
+            latest_date_so_far = ''
+        elif current_geos == max_geos_so_far:
+            most_geos.append(options)
+        else:
+            # Do not compute tiebreaker stats if not in most_geos.
+            continue
+        for row in rows:
+            dates = set(row.keys())
+            dates.remove('place')
+            row_max_date = max(dates)
+            if row_max_date > latest_date_so_far:
+                latest_date_so_far = row_max_date
+                latest_date = [options]
+            elif row_max_date == latest_date_so_far:
+                latest_date.append(options)
+    for options in most_geos:
+        if options in latest_date:
+            return rows_dict[options]
+
+
+def build_time_series_dataframe(places, stat_var, desc_col=False):
+    """Constructs a pandas DataFrame with `places` as the index and dates of the time series as the columns.
+
+    To ensure statistics are comparable across all Places, when multiple
+    StatVarObservations are available for Place and StatVar combos, Data
+    Commons selects the Observation options that covers the most Places, and breaks
+    ties using the Observation options that yield the latest Observation for any
+    Place.
+    
+    Args:
+      places (`str` or `iterable` of `str`): The dcids of Places to query for.
+      stat_var (`str`): The dcid of the StatisticalVariable.
+      desc_col: Whether to order columns in descending order.
+    Returns:
+      A pandas DataFrame with Place IDs as the index, and sorted dates as columns.
+    """
+    try:
+        if isinstance(places, six.string_types):
+            places = [places]
+        else:
+            places = list(places)
+            assert all(isinstance(place, six.string_types) for place in places)
+    except:
+        raise ValueError(
+            'Parameter `places` must be a string object or list-like object of string.'
+        )
+    if not isinstance(stat_var, six.string_types):
+        raise ValueError('Parameter `stat_var` must be a string.')
+
+    df = pd.DataFrame.from_records(_time_series_pd_input(places, stat_var))
+    df.set_index('place', inplace=True)
+    df.sort_index(inplace=True)
+    if desc_col:
+        return df[sorted(df.columns, reverse=desc_col)]
+    return df
+
+
+def _covariate_pd_input(places, stat_vars):
+    """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
+
+    Data Commons will pick a set of StatVarObservation options that covers the
+    maximum number of queried places. Among ties, Data Commons selects an option
+    set with the latest Observation.
+
+    Args:
+      places (`str` or `iterable` of `str`): The dcids of Places to query for.
+      stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
+    Returns:
+      A `list` of `dict`, one per element of `places`. Each `dict` consists of
+      the time series and place identifier.
+
+    Examples:
+      >>> _covariate_pd_input(["geoId/29", "geoId/33"], ["Count_Person", "Median_Income_Person"])
+          [
+            {'Count_Person': 20, 'Median_Income_Person': 40, 'place': 'geoId/29'},
+            {'Count_Person': 428, 'Median_Income_Person': 429, 'place': 'geoId/33'}
+          ]
+    """
+
+    rows_dict = _group_stat_all_by_obs_options(places, stat_vars, 'covariates')
+    place2cov = collections.defaultdict(dict)  # {geo: {var1: 3, var2: 33}}
+
+    for stat_var, candidates_dict in rows_dict.items():
+        selected_rows = None
+        most_geos = []
+        max_geos_so_far = 0
+        latest_date = []
+        latest_date_so_far = ''
+        for options, rows in candidates_dict.items():
+            current_geos = len(rows)
+            if current_geos > max_geos_so_far:
+                max_geos_so_far = current_geos
+                most_geos = [options]
+                # Reset tiebreaker stats. Recompute after this if-else block.
+                latest_date = []
+                latest_date_so_far = ''
+            elif current_geos == max_geos_so_far:
+                most_geos.append(options)
+            else:
+                # Do not compute tiebreaker stats if not in most_geos.
+                continue
+            for row in rows:
+                row_date = row['date']
+                if row_date > latest_date_so_far:
+                    latest_date_so_far = row_date
+                    latest_date = [options]
+                elif row_date == latest_date_so_far:
+                    latest_date.append(options)
+        for options in most_geos:
+            if options in latest_date:
+                selected_rows = candidates_dict[options]
+
+        for row in selected_rows:
+            place2cov[row['place']][stat_var] = row['val']
+    return [
+        dict({'place': place}, **covariates)
+        for place, covariates in place2cov.items()
+    ]
+
+
+def build_covariate_dataframe(places, stat_vars):
+    """Constructs a pandas DataFrame with `places` as the index and `stat_vars` as the columns.
+
+    To ensure statistics are comparable across all Places, when multiple
+    StatVarObservations are available for Place and StatVar combos, Data
+    Commons selects the Observation options that covers the most Places, and breaks
+    ties using the Observation options that yield the latest Observation for any
+    Place.
+    
+    Args:
+      places (`str` or `iterable` of `str`): The dcids of Places to query for.
+      stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
+    Returns:
+      A pandas DataFrame with Place IDs as the index and `stat_vars` as columns.
+    """
+    try:
+        if isinstance(places, six.string_types):
+            places = [places]
+        else:
+            places = list(places)
+            assert all(isinstance(place, six.string_types) for place in places)
+        if isinstance(stat_vars, six.string_types):
+            stat_vars = [stat_vars]
+        else:
+            stat_vars = list(stat_vars)
+            assert all(
+                isinstance(stat_var, six.string_types)
+                for stat_var in stat_vars)
+    except:
+        raise ValueError(
+            'Parameter `places` and `stat_vars` must be string object or list-like object.'
+        )
+    df = pd.DataFrame.from_records(_covariate_pd_input(places, stat_vars))
+    df.set_index('place', inplace=True)
+    df.sort_index(inplace=True)
+    return df
\ No newline at end of file
diff --git a/dcpandas/examples/__init__.py b/dcpandas/examples/__init__.py
new file mode 100644
index 00000000..2c79033c
--- /dev/null
+++ b/dcpandas/examples/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2020 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/dcpandas/examples/df_builder.py b/dcpandas/examples/df_builder.py
new file mode 100644
index 00000000..cf70fd30
--- /dev/null
+++ b/dcpandas/examples/df_builder.py
@@ -0,0 +1,84 @@
+# Copyright 2020 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Basic examples for building Pandas objects using the Data Commons Pandas API."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dcpandas as dcpd
+
+
+def main():
+
+    print('\nBuild a pd.Series of time series for one variable and one place.')
+    print('dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")')
+    print('>>> ')
+    print(dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent"))
+
+    print(
+        '\nBuild a DataFrame of time series for one variable in multiple places.'
+    )
+    print(
+        'dcpd.build_time_series_dataframe(["geoId/29", "geoId/33"], "Median_Income_Person")'
+    )
+    print('>>> ')
+    print(
+        dcpd.build_time_series_dataframe(
+            ["geoId/33", "geoId/29", "country/USA"], "Median_Income_Person"))
+    print(
+        'dcpd.build_time_series_dataframe(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
+    )
+    print('>>> ')
+    print(
+        dcpd.build_time_series_dataframe(["country/USA"],
+                                         "Median_Income_Person",
+                                         desc_col=True))
+    print(
+        '\nBuild a DataFrame of latest observations for multiple variables in multiple places.'
+    )
+
+    print(
+        'dcpd.build_covariate_dataframe(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
+    )
+    print('>>> ')
+    print(
+        dcpd.build_covariate_dataframe(
+            ["geoId/06", "country/FRA"],
+            ["Median_Age_Person", "Count_Person", "Count_Household"]))
+
+    print('\n\nExpect 4 errors, starting HERE:')
+    try:
+        dcpd.build_time_series_dataframe(
+            ["geoId/33"], ["Median_Income_Person", "Count_Person"])
+    except ValueError as e:
+        print("Successfully errored on: ", e)
+    try:
+        dcpd.build_time_series_dataframe(24, ["Median_Income_Person"])
+    except ValueError as e:
+        print("Successfully errored on: ", e)
+    try:
+        dcpd.build_covariate_dataframe([3],
+                                       ["Median_Income_Person", "Count_Person"])
+    except ValueError as e:
+        print("Successfully errored on: ", e)
+    try:
+        dcpd.build_covariate_dataframe("country/USA", True)
+    except ValueError as e:
+        print("Successfully errored on: ", e)
+    print('until HERE.')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/dcpandas/setup.py b/dcpandas/setup.py
new file mode 100644
index 00000000..53e94314
--- /dev/null
+++ b/dcpandas/setup.py
@@ -0,0 +1,59 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Install datacommons."""
+from setuptools import setup, find_packages
+
+with open('README.md', 'r') as fh:
+    long_description = fh.read()
+
+# Package metadata.
+NAME = 'dcpandas'
+DESCRIPTION = 'A library to create Pandas objects using the Data Commons Python API.'
+URL = 'https://github.com/datacommonsorg/api-python'
+EMAIL = 'support@datacommons.org'
+AUTHOR = 'datacommons.org'
+REQUIRES_PYTHON = '>=2.7'
+VERSION = '0.0.1'
+
+REQUIRED = [
+    'six',
+    'pandas',
+]
+
+setup(
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    author=AUTHOR,
+    author_email=EMAIL,
+    maintainer=AUTHOR,
+    maintainer_email=EMAIL,
+    python_requires=REQUIRES_PYTHON,
+    url=URL,
+    packages=find_packages(),
+    install_requires=REQUIRED,
+    include_package_data=True,
+    license='Apache 2.0',
+    classifiers=[
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Topic :: Software Development',
+    ],
+)
diff --git a/dcpandas/stat_vars.py b/dcpandas/stat_vars.py
new file mode 120000
index 00000000..ab7359b6
--- /dev/null
+++ b/dcpandas/stat_vars.py
@@ -0,0 +1 @@
+../datacommons/stat_vars.py
\ No newline at end of file
diff --git a/dcpandas/test/__init__.py b/dcpandas/test/__init__.py
new file mode 100644
index 00000000..2c79033c
--- /dev/null
+++ b/dcpandas/test/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2020 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/dcpandas/test/df_builder_test.py b/dcpandas/test/df_builder_test.py
new file mode 100644
index 00000000..a9fc9c65
--- /dev/null
+++ b/dcpandas/test/df_builder_test.py
@@ -0,0 +1,286 @@
+# Copyright 2020 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Data Commons Python API unit tests.
+
+Unit tests for StatVar methods in the Data Commons Pandas API.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+try:
+    from unittest.mock import patch
+except ImportError:
+    from mock import patch
+
+import dcpandas.df_builder as dcpd
+import dcpandas.utils as utils
+import json
+import unittest
+import six
+import six.moves.urllib as urllib
+
+# Reusable parts of REST API /stat/all response.
+CA_COUNT_PERSON = {
+    "isDcAggregate":
+        "true",
+    "sourceSeries": [{
+        "val": {
+            "1990": 23640,
+            "1991": 24100,
+            "1993": 25090,
+        },
+        "observationPeriod": "P1Y",
+        "importName": "WorldDevelopmentIndicators",
+        "provenanceDomain": "worldbank.org"
+    }, {
+        "val": {
+            "1790": 3929214,
+            "1800": 5308483,
+            "1810": 7239881,
+        },
+        "measurementMethod": "WikidataPopulation",
+        "importName": "WikidataPopulation",
+        "provenanceDomain": "wikidata.org"
+    }, {
+        "val": {
+            "1890": 28360,
+            "1891": 24910,
+            "1892": 25070,
+        },
+        "measurementMethod": "OECDRegionalStatistics",
+        "observationPeriod": "P1Y",
+        "importName": "OECDRegionalDemography",
+        "provenanceDomain": "oecd.org"
+    }]
+}
+
+HU22_COUNT_PERSON = {
+    "sourceSeries": [{
+        "val": {
+            "1990": 2360,
+            "1991": 2410,
+            "1992": 2500,
+        },
+        "measurementMethod": "OECDRegionalStatistics",
+        "observationPeriod": "P1Y",
+        "importName": "OECDRegionalDemography",
+        "provenanceDomain": "oecd.org"
+    }]
+}
+
+CA_MEDIAN_AGE_PERSON = {
+    "sourceSeries": [{
+        "val": {
+            "1990": 12,
+            "1991": 24,
+            "1992": 24,
+        },
+        "measurementMethod": "WikidataPopulation",
+        "importName": "WikidataPopulation",
+        "provenanceDomain": "wikidata.org"
+    }]
+}
+
+
+def request_mock(*args, **kwargs):
+    """A mock urlopen requests sent in the requests package."""
+
+    # Create the mock response object.
+    class MockResponse:
+
+        def __init__(self, json_data):
+            self.json_data = json_data
+
+        def read(self):
+            return self.json_data
+
+    req = args[0]
+
+    stat_value_url_base = utils._API_ROOT + utils._API_ENDPOINTS[
+        'get_stat_value']
+    stat_series_url_base = utils._API_ROOT + utils._API_ENDPOINTS[
+        'get_stat_series']
+    stat_all_url_base = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all']
+
+    # Mock responses for urlopen requests to get_stat_series.
+    if req.get_full_url(
+    ) == stat_series_url_base + '?place=geoId/06&stat_var=Count_Person':
+        # Response returned when querying with basic args.
+        return MockResponse(json.dumps({"series": {"2000": 1, "2001": 2}}))
+    if (req.get_full_url() == stat_series_url_base +
+            '?place=geoId/06&stat_var=Count_Person&' +
+            'measurement_method=CensusPEPSurvey&observation_period=P1Y&' +
+            'unit=RealPeople&scaling_factor=100'):
+
+        # Response returned when querying with above optional params.
+        return MockResponse(json.dumps({"series": {"2000": 3, "2001": 42}}))
+    if (req.get_full_url() == stat_series_url_base +
+            '?place=geoId/06&stat_var=Count_Person&' +
+            'measurement_method=DNE'):
+
+        # Response returned when data not available for optional parameters.
+        # /stat/series?place=geoId/06&stat_var=Count_Person&measurement_method=DNE
+        return MockResponse(json.dumps({"series": {}}))
+
+    # Mock responses for urlopen requests to get_stat_all.
+    if req.get_full_url() == stat_all_url_base:
+        data = json.loads(req.data)
+
+        if (data['places'] == ['geoId/06', 'nuts/HU22'] and
+                data['stat_vars'] == ['Count_Person', 'Median_Age_Person']):
+            # Response returned when querying with above params.
+            # Median Age missing for HU22.
+            resp = {
+                "placeData": {
+                    "geoId/06": {
+                        "statVarData": {
+                            "Count_Person": CA_COUNT_PERSON,
+                            "Median_Age_Person": CA_MEDIAN_AGE_PERSON
+                        }
+                    },
+                    "nuts/HU22": {
+                        "statVarData": {
+                            "Count_Person": HU22_COUNT_PERSON,
+                            "Median_Age_Person": {}
+                        }
+                    }
+                }
+            }
+            return MockResponse(json.dumps(resp))
+
+        if (data['places'] == ['geoId/06', 'nuts/HU22'] and
+                data['stat_vars'] == ['Count_Person']):
+            # Response returned when querying with above params.
+            resp = {
+                "placeData": {
+                    "geoId/06": {
+                        "statVarData": {
+                            "Count_Person": CA_COUNT_PERSON,
+                        }
+                    },
+                    "nuts/HU22": {
+                        "statVarData": {
+                            "Count_Person": HU22_COUNT_PERSON,
+                        }
+                    }
+                }
+            }
+            return MockResponse(json.dumps(resp))
+        print("??????????????????")
+        print(data)
+        print("??????????????????")
+
+        if (data['places'] == ['geoId/06'] and
+                data['stat_vars'] == ['Count_Person']):
+            # Response returned when querying with above params.
+            resp = {
+                "placeData": {
+                    "geoId/06": {
+                        "statVarData": {
+                            "Count_Person": CA_COUNT_PERSON,
+                        }
+                    }
+                }
+            }
+            return MockResponse(json.dumps(resp))
+
+        if (data['places'] == ['geoId/06', 'nuts/HU22'] and
+                data['stat_vars'] == ['Count_Person', 'Median_Age_Person']):
+            # Response returned when querying with above params.
+            # Median Age missing for HU22.
+            resp = {
+                "placeData": {
+                    "geoId/06": {
+                        "statVarData": {
+                            "Count_Person": CA_COUNT_PERSON,
+                            "Median_Age_Person": CA_MEDIAN_AGE_PERSON
+                        }
+                    },
+                    "nuts/HU22": {
+                        "statVarData": {
+                            "Count_Person": HU22_COUNT_PERSON,
+                            "Median_Age_Person": {}
+                        }
+                    }
+                }
+            }
+            return MockResponse(json.dumps(resp))
+    # Otherwise, return an empty response and a 404.
+    return urllib.error.HTTPError
+
+
+class TestPdTimeSeries(unittest.TestCase):
+    """Unit tests for _time_series_pd_input."""
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_basic(self, urlopen):
+        """Calling _time_series_pd_input with proper args."""
+        rows = dcpd._time_series_pd_input(['geoId/06', 'nuts/HU22'],
+                                          'Count_Person')
+        exp = [{
+            "1890": 28360,
+            "1891": 24910,
+            "1892": 25070,
+            "place": "geoId/06"
+        }, {
+            "1991": 2410,
+            "1990": 2360,
+            "1992": 2500,
+            "place": "nuts/HU22"
+        }]
+        six.assertCountEqual(self, rows, exp)
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_one_place(self, urlopen):
+        """Calling _time_series_pd_input with single place."""
+        rows = dcpd._time_series_pd_input(['geoId/06'], 'Count_Person')
+        exp = [{
+            "1990": 23640,
+            "1991": 24100,
+            "1993": 25090,
+            "place": "geoId/06"
+        }]
+        self.assertEqual(rows, exp)
+
+
+class TestPdCovariates(unittest.TestCase):
+    """Unit tests for _covariate_pd_input."""
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_basic(self, urlopen):
+        """Calling _covariate_pd_input with proper args."""
+        rows = dcpd._covariate_pd_input(['geoId/06', 'nuts/HU22'],
+                                        ['Count_Person', 'Median_Age_Person'])
+        exp = [{
+            "place": "geoId/06",
+            "Median_Age_Person": 24,
+            "Count_Person": 25070
+        }, {
+            "place": "nuts/HU22",
+            "Count_Person": 2500
+        }]
+        six.assertCountEqual(self, rows, exp)
+
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_one_each(self, urlopen):
+        """Calling _covariate_pd_input with single place and var."""
+        rows = dcpd._covariate_pd_input(['geoId/06'], ['Count_Person'])
+        exp = [{"place": "geoId/06", "Count_Person": 25090}]
+        self.assertEqual(rows, exp)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/dcpandas/utils.py b/dcpandas/utils.py
new file mode 120000
index 00000000..06c545f5
--- /dev/null
+++ b/dcpandas/utils.py
@@ -0,0 +1 @@
+../datacommons/utils.py
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 0141b5f1..35bbdfec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 six
 pytest
 mock
+pandas
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 7907027e..d8eaf315 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Install datacommons."""
 from setuptools import setup
 
@@ -25,7 +24,7 @@
 EMAIL = 'support@datacommons.org'
 AUTHOR = 'datacommons.org'
 REQUIRES_PYTHON = '>=2.7'
-VERSION = '1.3.0'
+VERSION = '1.4.0'
 
 REQUIRED = [
     'six',

From 32a028486c8b4b4d1388b96b16a41c8572a4a06c Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 02:16:06 -0700
Subject: [PATCH 10/35] Do the python release in another PR.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d8eaf315..68d1016d 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
 EMAIL = 'support@datacommons.org'
 AUTHOR = 'datacommons.org'
 REQUIRES_PYTHON = '>=2.7'
-VERSION = '1.4.0'
+VERSION = '1.3.0'
 
 REQUIRED = [
     'six',

From 160eee6f346e2ea2d500b73787e748a02ea915a3 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 02:16:20 -0700
Subject: [PATCH 11/35] Remove stale refs in datacommons library to pandas
 features.

---
 datacommons/examples/stat_vars.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index 9b7c29b5..8ec77a1b 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -126,22 +126,6 @@ def call_str(pvs):
         dc.get_stat_all(["badPlaceId", "country/FRA"],
                         ["Median_Age_Person", "Count_Person"]))
 
-    print(
-        'dc.time_series_pd_input(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
-    )
-    print('>>> ')
-    pp.pprint(
-        dc.time_series_pd_input(["geoId/29", "geoId/33"],
-                                "Median_Income_Person"))
-
-    print(
-        'dc.covariate_pd_input(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
-    )
-    print('>>> ')
-    pp.pprint(
-        dc.covariate_pd_input(["geoId/06", "country/FRA"],
-                              ["Median_Age_Person", "Count_Person"]))
-
 
 if __name__ == '__main__':
     main()

From 771b0a5fb9f1bb98df7d3c12f9fbb3063d9cf1f0 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 02:18:07 -0700
Subject: [PATCH 12/35] Update pandas readme.

---
 dcpandas/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dcpandas/README.md b/dcpandas/README.md
index 1b8c6158..fb994f97 100644
--- a/dcpandas/README.md
+++ b/dcpandas/README.md
@@ -4,16 +4,16 @@ This is a Python library for creating Pandas objects with data in the
 Data Commons Graph.
 To get started, install this package from pip.
 
-    pip install datacommons-pandas
+    pip install dcpandas
 
-Once the package is installed, import `datacommons-pandas`.
+Once the package is installed, import `dcpandas`.
 
-    import datacommons-pandas as dcpd
+    import dcpandas as dcpd
 
 For more detail on getting started with the API, please visit our
 [API Overview](http://docs.datacommons.org/api/).
 
-After you're ready to use the API, you can refer to `datacommons-pandas/examples` for
+After you're ready to use the API, you can refer to `dcpandas/examples` for
 examples on how to use this package to perform various tasks. More tutorials and
 documentation can be found at [tutorials](https://datacommons.org/colab)!
 

From 5a86466e0a746d6ec3ea32eb03121e06e8decb6a Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 02:19:45 -0700
Subject: [PATCH 13/35] Cleanup format.

---
 dcpandas/__init__.py             | 2 +-
 dcpandas/test/df_builder_test.py | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/dcpandas/__init__.py b/dcpandas/__init__.py
index baa42576..fe8f6bb8 100644
--- a/dcpandas/__init__.py
+++ b/dcpandas/__init__.py
@@ -13,4 +13,4 @@
 # limitations under the License.
 
 from dcpandas.stat_vars import get_stat_value, get_stat_series, get_stat_all
-from dcpandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
\ No newline at end of file
+from dcpandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
diff --git a/dcpandas/test/df_builder_test.py b/dcpandas/test/df_builder_test.py
index a9fc9c65..20e3ab76 100644
--- a/dcpandas/test/df_builder_test.py
+++ b/dcpandas/test/df_builder_test.py
@@ -179,9 +179,6 @@ def read(self):
                 }
             }
             return MockResponse(json.dumps(resp))
-        print("??????????????????")
-        print(data)
-        print("??????????????????")
 
         if (data['places'] == ['geoId/06'] and
                 data['stat_vars'] == ['Count_Person']):

From 5780970d7101477b55b476a57fdd86076359d8be Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 02:47:00 -0700
Subject: [PATCH 14/35] Remove pd-related mocks from python testing.

---
 datacommons/test/stat_vars_test.py | 54 ------------------------------
 1 file changed, 54 deletions(-)

diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
index 60ef037a..613674a6 100644
--- a/datacommons/test/stat_vars_test.py
+++ b/datacommons/test/stat_vars_test.py
@@ -225,39 +225,6 @@ def read(self):
             }
             return MockResponse(json.dumps(resp))
 
-        if (data['places'] == ['geoId/06', 'nuts/HU22'] and
-                data['stat_vars'] == ['Count_Person']):
-            # Response returned when querying with above params.
-            resp = {
-                "placeData": {
-                    "geoId/06": {
-                        "statVarData": {
-                            "Count_Person": CA_COUNT_PERSON,
-                        }
-                    },
-                    "nuts/HU22": {
-                        "statVarData": {
-                            "Count_Person": HU22_COUNT_PERSON,
-                        }
-                    }
-                }
-            }
-            return MockResponse(json.dumps(resp))
-
-        if (data['places'] == ['geoId/06'] and
-                data['stat_vars'] == ['Count_Person']):
-            # Response returned when querying with above params.
-            resp = {
-                "placeData": {
-                    "geoId/06": {
-                        "statVarData": {
-                            "Count_Person": CA_COUNT_PERSON,
-                        }
-                    }
-                }
-            }
-            return MockResponse(json.dumps(resp))
-
         if (data['places'] == ['badPlaceId', 'nuts/HU22'] and
                 data['stat_vars'] == ['Count_Person', 'badStatVarId']):
             # Response returned when querying with above params.
@@ -280,27 +247,6 @@ def read(self):
             }
             return MockResponse(json.dumps(resp))
 
-        if (data['places'] == ['geoId/06', 'nuts/HU22'] and
-                data['stat_vars'] == ['Count_Person', 'Median_Age_Person']):
-            # Response returned when querying with above params.
-            # Median Age missing for HU22.
-            resp = {
-                "placeData": {
-                    "geoId/06": {
-                        "statVarData": {
-                            "Count_Person": CA_COUNT_PERSON,
-                            "Median_Age_Person": CA_MEDIAN_AGE_PERSON
-                        }
-                    },
-                    "nuts/HU22": {
-                        "statVarData": {
-                            "Count_Person": HU22_COUNT_PERSON,
-                            "Median_Age_Person": {}
-                        }
-                    }
-                }
-            }
-            return MockResponse(json.dumps(resp))
     # Otherwise, return an empty response and a 404.
     return urllib.error.HTTPError
 

From cb83487bf73f2e992cac925414798c101b3a762f Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 02:59:03 -0700
Subject: [PATCH 15/35] Cosmetics.

---
 dcpandas/CHANGELOG.md           |  4 ++--
 dcpandas/README.md              |  2 +-
 dcpandas/__init__.py            |  2 +-
 dcpandas/df_builder.py          | 10 +++-------
 dcpandas/examples/df_builder.py |  2 +-
 dcpandas/setup.py               |  2 +-
 6 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/dcpandas/CHANGELOG.md b/dcpandas/CHANGELOG.md
index 5249b4f6..750ab4b9 100644
--- a/dcpandas/CHANGELOG.md
+++ b/dcpandas/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Changelog
 
-## 0.01
+## 0.0.1
 
 **Date** - 08/24/2020
 
@@ -8,7 +8,7 @@
 
 **Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
 
-Added Pandas wrapper functions.
+Added pandas wrapper functions.
 
 -   `build_time_series` will construct a pd.Series for a given StatisticalVariable and Place, where dates are the index for the time series.
 -   `build_time_series_dataframe` will construct a pd.DataFrame for a given StatisticalVariable and a set of Places: where Places are the index and date are the columns.
diff --git a/dcpandas/README.md b/dcpandas/README.md
index fb994f97..b3daafc3 100644
--- a/dcpandas/README.md
+++ b/dcpandas/README.md
@@ -1,6 +1,6 @@
 # Data Commons Pandas API
 
-This is a Python library for creating Pandas objects with data in the
+This is a Python library for creating pandas objects with data in the
 Data Commons Graph.
 To get started, install this package from pip.
 
diff --git a/dcpandas/__init__.py b/dcpandas/__init__.py
index fe8f6bb8..4a34cabb 100644
--- a/dcpandas/__init__.py
+++ b/dcpandas/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2017 Google Inc.
+# Copyright 2020 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/dcpandas/df_builder.py b/dcpandas/df_builder.py
index 432b148c..fbe9ddd0 100644
--- a/dcpandas/df_builder.py
+++ b/dcpandas/df_builder.py
@@ -11,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Data Commons Python API Stat Module.
+"""Data Commons Pandas API DataFrame Builder Module.
 
-Provides functions for getting data on StatisticalVariables from Data Commons Graph.
+Provides functions for building pandas DataFrames using the Data Commons Graph.
 """
 
 from __future__ import absolute_import
@@ -26,10 +26,6 @@
 
 import dcpandas.stat_vars as dc
 
-# Pandas Helpers
-# These functions are wrapper functions that create Python data structures
-# that are easily converted to Pandas DataFrames (and Series).
-
 
 def build_time_series(place, stat_var):
     """Constructs a pandas Series with `dates` as the index and corresponding `stat_var` statistics as values.
@@ -295,4 +291,4 @@ def build_covariate_dataframe(places, stat_vars):
     df = pd.DataFrame.from_records(_covariate_pd_input(places, stat_vars))
     df.set_index('place', inplace=True)
     df.sort_index(inplace=True)
-    return df
\ No newline at end of file
+    return df
diff --git a/dcpandas/examples/df_builder.py b/dcpandas/examples/df_builder.py
index cf70fd30..ab72ae73 100644
--- a/dcpandas/examples/df_builder.py
+++ b/dcpandas/examples/df_builder.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Basic examples for building Pandas objects using the Data Commons Pandas API."""
+"""Basic examples for building pandas objects using the Data Commons Pandas API."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/dcpandas/setup.py b/dcpandas/setup.py
index 53e94314..14e2dc70 100644
--- a/dcpandas/setup.py
+++ b/dcpandas/setup.py
@@ -19,7 +19,7 @@
 
 # Package metadata.
 NAME = 'dcpandas'
-DESCRIPTION = 'A library to create Pandas objects using the Data Commons Python API.'
+DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.'
 URL = 'https://github.com/datacommonsorg/api-python'
 EMAIL = 'support@datacommons.org'
 AUTHOR = 'datacommons.org'

From 85b3a9b83df67598cd3e75de7a159513b583fdfa Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 03:28:20 -0700
Subject: [PATCH 16/35] Update docstring

---
 dcpandas/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dcpandas/setup.py b/dcpandas/setup.py
index 14e2dc70..fd94ca0e 100644
--- a/dcpandas/setup.py
+++ b/dcpandas/setup.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Install datacommons."""
+"""Install dcpandas."""
 from setuptools import setup, find_packages
 
 with open('README.md', 'r') as fh:

From 4044c0359dc5223ce888e1b965dd078afd2024ce Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 04:19:04 -0700
Subject: [PATCH 17/35] Fix import statement for pip. Always sort time series
 df columns.

---
 dcpandas/df_builder.py |  6 ++--
 setup_dcpandas.py      | 62 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 4 deletions(-)
 create mode 100644 setup_dcpandas.py

diff --git a/dcpandas/df_builder.py b/dcpandas/df_builder.py
index fbe9ddd0..e0317f91 100644
--- a/dcpandas/df_builder.py
+++ b/dcpandas/df_builder.py
@@ -24,7 +24,7 @@
 import pandas as pd
 import six
 
-import dcpandas.stat_vars as dc
+import datacommons.stat_vars as dc
 
 
 def build_time_series(place, stat_var):
@@ -188,9 +188,7 @@ def build_time_series_dataframe(places, stat_var, desc_col=False):
     df = pd.DataFrame.from_records(_time_series_pd_input(places, stat_var))
     df.set_index('place', inplace=True)
     df.sort_index(inplace=True)
-    if desc_col:
-        return df[sorted(df.columns, reverse=desc_col)]
-    return df
+    return df[sorted(df.columns, reverse=desc_col)]
 
 
 def _covariate_pd_input(places, stat_vars):
diff --git a/setup_dcpandas.py b/setup_dcpandas.py
new file mode 100644
index 00000000..a73fb3c8
--- /dev/null
+++ b/setup_dcpandas.py
@@ -0,0 +1,62 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Install dcpandas."""
+from setuptools import setup, find_packages
+
+with open('README.md', 'r') as fh:
+    long_description = fh.read()
+
+# Package metadata.
+NAME = 'dcpandas_tjann'
+DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.'
+URL = 'https://github.com/datacommonsorg/api-python'
+EMAIL = 'support@datacommons.org'
+AUTHOR = 'datacommons.org'
+REQUIRES_PYTHON = '>=2.7'
+VERSION = '0.0.4'
+
+REQUIRED = [
+    'six',
+    'pandas',
+]
+
+PACKAGES = ['dcpandas']
+PACKAGE_DIR = {'dcpandas': 'dcpandas'}
+setup(
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    author=AUTHOR,
+    author_email=EMAIL,
+    maintainer=AUTHOR,
+    maintainer_email=EMAIL,
+    python_requires=REQUIRES_PYTHON,
+    url=URL,
+    packages=PACKAGES,
+    package_dir=PACKAGE_DIR,
+    install_requires=REQUIRED,
+    include_package_data=True,
+    license='Apache 2.0',
+    classifiers=[
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Topic :: Software Development',
+    ],
+)

From d6290beabccf8b084749fbf964541c13cbfc1cae Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 04:21:11 -0700
Subject: [PATCH 18/35] Restore pandas setup to prepare for release.

---
 setup_dcpandas.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup_dcpandas.py b/setup_dcpandas.py
index a73fb3c8..14dbbe33 100644
--- a/setup_dcpandas.py
+++ b/setup_dcpandas.py
@@ -18,13 +18,13 @@
     long_description = fh.read()
 
 # Package metadata.
-NAME = 'dcpandas_tjann'
+NAME = 'dcpandas'
 DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.'
 URL = 'https://github.com/datacommonsorg/api-python'
 EMAIL = 'support@datacommons.org'
 AUTHOR = 'datacommons.org'
 REQUIRES_PYTHON = '>=2.7'
-VERSION = '0.0.4'
+VERSION = '0.0.1'
 
 REQUIRED = [
     'six',

From 4bba808c4959d3548319fe28db176410b78834d4 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 13:40:19 -0700
Subject: [PATCH 19/35] change _group_stat_all_by_obs_options mode parameter to
 time_series boolean.

---
 dcpandas/df_builder.py | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/dcpandas/df_builder.py b/dcpandas/df_builder.py
index e0317f91..64ef8395 100644
--- a/dcpandas/df_builder.py
+++ b/dcpandas/df_builder.py
@@ -39,14 +39,13 @@ def build_time_series(place, stat_var):
     return pd.Series(dc.get_stat_series(place, stat_var))
 
 
-def _group_stat_all_by_obs_options(places, stat_vars, mode):
-    """Groups the result of `get_stat_all` by Observation options for time series.
+def _group_stat_all_by_obs_options(places, stat_vars, time_series=True):
+    """Groups the result of `get_stat_all` by Observation options for time series or covariates.
     
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
       stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
-      mode (`str`): "series" to output time series grouped by Observation options, or
-        "covariates" to output latest Observations.
+      mode (`boolean`): if True, output time series grouped by Observation options; if False, output latest Observation grouped by Observation options.
     Returns:
       A pandas Series with Place IDs as the index, and Observed statistics as values.
 
@@ -54,20 +53,14 @@ def _group_stat_all_by_obs_options(places, stat_vars, mode):
       ValueError: If the payload returned by the Data Commons REST API is
         malformed.
     """
-    kseries = "series"
-    kcov = "covariates"
-
-    if mode == kseries:
+    if time_series:
         if len(stat_vars) != 1:
             raise ValueError(
-                'When `mode=series`, only one StatisticalVariable for `stat_vars` is allowed.'
+                'When `time_series` is set, only one StatisticalVariable for `stat_vars` is allowed.'
             )
         res = collections.defaultdict(list)
-    elif mode == kcov:
-        res = collections.defaultdict(lambda: collections.defaultdict(list))
     else:
-        raise ValueError(
-            'Value of `mode` must be one of ("series", "covariates")')
+        res = collections.defaultdict(lambda: collections.defaultdict(list))
 
     stat_all = dc.get_stat_all(places, stat_vars)
     for place, place_data in stat_all.items():
@@ -86,19 +79,19 @@ def _group_stat_all_by_obs_options(places, stat_vars, mode):
                                ('unit', source_series.get('unit')),
                                ('scalingFactor',
                                 source_series.get('scalingFactor')))
-                if mode == kseries:
+                if time_series:
                     res[obs_options].append(
                         dict({'place': place}, **time_series))
-                elif mode == kcov:
+                else:
                     date = max(time_series)
                     res[stat_var][obs_options].append({
                         'place': place,
                         'date': date,
                         'val': time_series[date]
                     })
-    if mode == kseries:
+    if time_series:
         return dict(res)
-    elif mode == kcov:
+    else:
         return {k: dict(v) for k, v in res.items()}
 
 
@@ -124,7 +117,8 @@ def _time_series_pd_input(places, stat_var):
           ]
     """
 
-    rows_dict = _group_stat_all_by_obs_options(places, [stat_var], 'series')
+    rows_dict = _group_stat_all_by_obs_options(places, [stat_var],
+                                               time_series=True)
     most_geos = []
     max_geos_so_far = 0
     latest_date = []
@@ -213,7 +207,9 @@ def _covariate_pd_input(places, stat_vars):
           ]
     """
 
-    rows_dict = _group_stat_all_by_obs_options(places, stat_vars, 'covariates')
+    rows_dict = _group_stat_all_by_obs_options(places,
+                                               stat_vars,
+                                               time_series=False)
     place2cov = collections.defaultdict(dict)  # {geo: {var1: 3, var2: 33}}
 
     for stat_var, candidates_dict in rows_dict.items():

From c81eaa67fb6265c00c3793505777f3e5ce02fafa Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 14:18:19 -0700
Subject: [PATCH 20/35] Address some documentation suggestions from cyin.

---
 README.md             | 4 ++--
 dcpandas/CHANGELOG.md | 4 ++--
 dcpandas/README.md    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 8a93ce8e..ba65c8e6 100644
--- a/README.md
+++ b/README.md
@@ -21,9 +21,9 @@ understanding API usage.
 For more detail on getting started with the API, please visit our
 [API Overview](http://docs.datacommons.org/api/).
 
-After you're ready to use the API, you can refer to `datacommons/examples` for
+When you are ready to use the API, you can refer to `datacommons/examples` for
 examples on how to use this package to perform various tasks. More tutorials and
-documentation can be found at [tutorials](https://datacommons.org/colab)!
+documentation can be found on our [tutorials page](https://datacommons.org/colab)!
 
 ## About Data Commons
 
diff --git a/dcpandas/CHANGELOG.md b/dcpandas/CHANGELOG.md
index 750ab4b9..068f599a 100644
--- a/dcpandas/CHANGELOG.md
+++ b/dcpandas/CHANGELOG.md
@@ -11,8 +11,8 @@
 Added pandas wrapper functions.
 
 -   `build_time_series` will construct a pd.Series for a given StatisticalVariable and Place, where dates are the index for the time series.
--   `build_time_series_dataframe` will construct a pd.DataFrame for a given StatisticalVariable and a set of Places: where Places are the index and date are the columns.
--   `build_covariate_dataframe` will construct a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places: with Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
+-   `build_time_series_dataframe` will construct a pd.DataFrame for a given StatisticalVariable and a set of Places. The DataFrame will have Places as the index and dates as the columns.
+-   `build_covariate_dataframe` will construct a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have  Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
 
 For multi-place functions, when a StatisticalVariable has multiple StatVarObservation options,
 Data Commons chooses a set of StatVarObservation options that covers the most geos. This
diff --git a/dcpandas/README.md b/dcpandas/README.md
index b3daafc3..779e247f 100644
--- a/dcpandas/README.md
+++ b/dcpandas/README.md
@@ -13,9 +13,9 @@ Once the package is installed, import `dcpandas`.
 For more detail on getting started with the API, please visit our
 [API Overview](http://docs.datacommons.org/api/).
 
-After you're ready to use the API, you can refer to `dcpandas/examples` for
+When you are ready to use the API, you can refer to `dcpandas/examples` for
 examples on how to use this package to perform various tasks. More tutorials and
-documentation can be found at [tutorials](https://datacommons.org/colab)!
+documentation can be found on our [tutorials page](https://datacommons.org/colab)!
 
 ## About Data Commons
 

From d3a618d53da1cb1a32b8e49eebd6c1855051e13e Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 14:19:21 -0700
Subject: [PATCH 21/35] Fix bug from reassigning parameter time_series value in
 _group_stat_all_by_obs_options.

---
 dcpandas/df_builder.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dcpandas/df_builder.py b/dcpandas/df_builder.py
index 64ef8395..dc035746 100644
--- a/dcpandas/df_builder.py
+++ b/dcpandas/df_builder.py
@@ -70,7 +70,7 @@ def _group_stat_all_by_obs_options(places, stat_vars, time_series=True):
             if not stat_var_data:
                 continue
             for source_series in stat_var_data['sourceSeries']:
-                time_series = source_series['val']
+                series = source_series['val']
                 # Create a hashable for Observation options.
                 obs_options = (('measurementMethod',
                                 source_series.get('measurementMethod')),
@@ -80,14 +80,13 @@ def _group_stat_all_by_obs_options(places, stat_vars, time_series=True):
                                ('scalingFactor',
                                 source_series.get('scalingFactor')))
                 if time_series:
-                    res[obs_options].append(
-                        dict({'place': place}, **time_series))
+                    res[obs_options].append(dict({'place': place}, **series))
                 else:
-                    date = max(time_series)
+                    date = max(series)
                     res[stat_var][obs_options].append({
                         'place': place,
                         'date': date,
-                        'val': time_series[date]
+                        'val': series[date]
                     })
     if time_series:
         return dict(res)
@@ -136,6 +135,7 @@ def _time_series_pd_input(places, stat_var):
         else:
             # Do not compute tiebreaker stats if not in most_geos.
             continue
+
         for row in rows:
             dates = set(row.keys())
             dates.remove('place')

From a4bcf4efb9786f108f48aead227dd1b3c6ba0f8f Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 14:19:47 -0700
Subject: [PATCH 22/35] Make df_builder examples more readable.

---
 dcpandas/examples/df_builder.py | 48 ++++++++++++++-------------------
 1 file changed, 20 insertions(+), 28 deletions(-)

diff --git a/dcpandas/examples/df_builder.py b/dcpandas/examples/df_builder.py
index ab72ae73..aabe6519 100644
--- a/dcpandas/examples/df_builder.py
+++ b/dcpandas/examples/df_builder.py
@@ -22,41 +22,33 @@
 
 def main():
 
-    print('\nBuild a pd.Series of time series for one variable and one place.')
-    print('dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")')
-    print('>>> ')
-    print(dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent"))
+    print("""
+# Build a pd.Series of time series for one variable and one place.
+$ dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")
+{}""".format(dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")))
 
-    print(
-        '\nBuild a DataFrame of time series for one variable in multiple places.'
-    )
-    print(
-        'dcpd.build_time_series_dataframe(["geoId/29", "geoId/33"], "Median_Income_Person")'
-    )
-    print('>>> ')
-    print(
+    print("""
+# Build a DataFrame of time series for one variable in multiple places.
+$ dcpd.build_time_series_dataframe(["geoId/29", "geoId/33"], "Median_Income_Person")
+{}""".format(
         dcpd.build_time_series_dataframe(
-            ["geoId/33", "geoId/29", "country/USA"], "Median_Income_Person"))
-    print(
-        'dcpd.build_time_series_dataframe(["geoId/29", "geoId/33"], "CumulativeCount_MedicalConditionIncident_COVID_19_PatientDeceased")'
-    )
-    print('>>> ')
-    print(
+            ["geoId/33", "geoId/29", "country/USA"], "Median_Income_Person")))
+
+    print("""
+# Build a DataFrame of time series with columns sorted in descending order.
+$ dcpd.build_time_series_dataframe(["country/USA"], "Median_Income_Person", desc_col=True)
+{}""".format(
         dcpd.build_time_series_dataframe(["country/USA"],
                                          "Median_Income_Person",
-                                         desc_col=True))
-    print(
-        '\nBuild a DataFrame of latest observations for multiple variables in multiple places.'
-    )
+                                         desc_col=True)))
 
-    print(
-        'dcpd.build_covariate_dataframe(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person"])'
-    )
-    print('>>> ')
-    print(
+    print("""
+# Build a DataFrame of latest observations for multiple variables in multiple places.
+$ dcpd.build_covariate_dataframe(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person", "Count_Household"])
+{}""".format(
         dcpd.build_covariate_dataframe(
             ["geoId/06", "country/FRA"],
-            ["Median_Age_Person", "Count_Person", "Count_Household"]))
+            ["Median_Age_Person", "Count_Person", "Count_Household"])))
 
     print('\n\nExpect 4 errors, starting HERE:')
     try:

From a2202c08f20af03eddd6fc7e0e07c0ccd836b8df Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 14:23:54 -0700
Subject: [PATCH 23/35] Update the docstrings for both PyPI release setup*.py
 files. Change dcpandas to datacommons_pandas.

---
 setup.py          | 2 +-
 setup_dcpandas.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 68d1016d..f266ac52 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Install datacommons."""
+"""Build and distrubute the datacommons package to PyPI."""
 from setuptools import setup
 
 with open('README.md', 'r') as fh:
diff --git a/setup_dcpandas.py b/setup_dcpandas.py
index 14dbbe33..79741bf3 100644
--- a/setup_dcpandas.py
+++ b/setup_dcpandas.py
@@ -1,4 +1,4 @@
-# Copyright 2017 Google Inc.
+# Copyright 2020 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,14 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Install dcpandas."""
-from setuptools import setup, find_packages
+"""Build and distrubute the datacommons_pandas package to PyPI."""
+from setuptools import setup
 
 with open('README.md', 'r') as fh:
     long_description = fh.read()
 
 # Package metadata.
-NAME = 'dcpandas'
+NAME = 'datacommons_pandas'
 DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.'
 URL = 'https://github.com/datacommonsorg/api-python'
 EMAIL = 'support@datacommons.org'

From 0ebb20fd267600904aa94c1c4c4da17cf24bbb2a Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 15:52:27 -0700
Subject: [PATCH 24/35] Rename time_series parameter to keep_series for
 _group_stat_all_by_obs_options.

---
 dcpandas/df_builder.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/dcpandas/df_builder.py b/dcpandas/df_builder.py
index dc035746..74160755 100644
--- a/dcpandas/df_builder.py
+++ b/dcpandas/df_builder.py
@@ -39,13 +39,13 @@ def build_time_series(place, stat_var):
     return pd.Series(dc.get_stat_series(place, stat_var))
 
 
-def _group_stat_all_by_obs_options(places, stat_vars, time_series=True):
+def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
     """Groups the result of `get_stat_all` by Observation options for time series or covariates.
     
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
       stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
-      mode (`boolean`): if True, output time series grouped by Observation options; if False, output latest Observation grouped by Observation options.
+      keep_series (`boolean`): if True, output time series grouped by Observation options; if False, output latest Observation grouped by Observation options.
     Returns:
       A pandas Series with Place IDs as the index, and Observed statistics as values.
 
@@ -53,10 +53,10 @@ def _group_stat_all_by_obs_options(places, stat_vars, time_series=True):
       ValueError: If the payload returned by the Data Commons REST API is
         malformed.
     """
-    if time_series:
+    if keep_series:
         if len(stat_vars) != 1:
             raise ValueError(
-                'When `time_series` is set, only one StatisticalVariable for `stat_vars` is allowed.'
+                'When `keep_series` is set, only one StatisticalVariable for `stat_vars` is allowed.'
             )
         res = collections.defaultdict(list)
     else:
@@ -79,7 +79,7 @@ def _group_stat_all_by_obs_options(places, stat_vars, time_series=True):
                                ('unit', source_series.get('unit')),
                                ('scalingFactor',
                                 source_series.get('scalingFactor')))
-                if time_series:
+                if keep_series:
                     res[obs_options].append(dict({'place': place}, **series))
                 else:
                     date = max(series)
@@ -88,7 +88,7 @@ def _group_stat_all_by_obs_options(places, stat_vars, time_series=True):
                         'date': date,
                         'val': series[date]
                     })
-    if time_series:
+    if keep_series:
         return dict(res)
     else:
         return {k: dict(v) for k, v in res.items()}
@@ -117,7 +117,7 @@ def _time_series_pd_input(places, stat_var):
     """
 
     rows_dict = _group_stat_all_by_obs_options(places, [stat_var],
-                                               time_series=True)
+                                               keep_series=True)
     most_geos = []
     max_geos_so_far = 0
     latest_date = []
@@ -209,7 +209,7 @@ def _covariate_pd_input(places, stat_vars):
 
     rows_dict = _group_stat_all_by_obs_options(places,
                                                stat_vars,
-                                               time_series=False)
+                                               keep_series=False)
     place2cov = collections.defaultdict(dict)  # {geo: {var1: 3, var2: 33}}
 
     for stat_var, candidates_dict in rows_dict.items():

From efd2e0c9834909a20425e035d0aa3e22724e1c53 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 19:33:00 -0700
Subject: [PATCH 25/35] dcpandas to datacommons_pandas, including all
 datacommons functions

---
 datacommons/__init__.py                       |  2 +-
 {dcpandas => datacommons_pandas}/CHANGELOG.md |  0
 {dcpandas => datacommons_pandas}/README.md    |  0
 datacommons_pandas/__init__.py                | 27 +++++++++
 datacommons_pandas/core.py                    |  1 +
 .../df_builder.py                             |  2 +-
 .../examples/__init__.py                      |  0
 .../examples/df_builder.py                    |  2 +-
 datacommons_pandas/places.py                  |  1 +
 datacommons_pandas/populations.py             |  1 +
 datacommons_pandas/query.py                   |  1 +
 {dcpandas => datacommons_pandas}/stat_vars.py |  0
 .../test/__init__.py                          |  0
 .../test/df_builder_test.py                   |  4 +-
 {dcpandas => datacommons_pandas}/utils.py     |  0
 dcpandas/__init__.py                          | 16 -----
 dcpandas/setup.py                             | 59 -------------------
 setup.py => setup_datacommons.py              |  0
 ...dcpandas.py => setup_datacommons_pandas.py |  6 +-
 19 files changed, 39 insertions(+), 83 deletions(-)
 rename {dcpandas => datacommons_pandas}/CHANGELOG.md (100%)
 rename {dcpandas => datacommons_pandas}/README.md (100%)
 create mode 100644 datacommons_pandas/__init__.py
 create mode 120000 datacommons_pandas/core.py
 rename {dcpandas => datacommons_pandas}/df_builder.py (99%)
 rename {dcpandas => datacommons_pandas}/examples/__init__.py (100%)
 rename {dcpandas => datacommons_pandas}/examples/df_builder.py (98%)
 create mode 120000 datacommons_pandas/places.py
 create mode 120000 datacommons_pandas/populations.py
 create mode 120000 datacommons_pandas/query.py
 rename {dcpandas => datacommons_pandas}/stat_vars.py (100%)
 rename {dcpandas => datacommons_pandas}/test/__init__.py (100%)
 rename {dcpandas => datacommons_pandas}/test/df_builder_test.py (99%)
 rename {dcpandas => datacommons_pandas}/utils.py (100%)
 delete mode 100644 dcpandas/__init__.py
 delete mode 100644 dcpandas/setup.py
 rename setup.py => setup_datacommons.py (100%)
 rename setup_dcpandas.py => setup_datacommons_pandas.py (92%)

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index da2f9fc4..3d7384d5 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -22,4 +22,4 @@
 from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all
 
 # Other utilities
-from .utils import set_api_key
+from datacommons.utils import set_api_key
diff --git a/dcpandas/CHANGELOG.md b/datacommons_pandas/CHANGELOG.md
similarity index 100%
rename from dcpandas/CHANGELOG.md
rename to datacommons_pandas/CHANGELOG.md
diff --git a/dcpandas/README.md b/datacommons_pandas/README.md
similarity index 100%
rename from dcpandas/README.md
rename to datacommons_pandas/README.md
diff --git a/datacommons_pandas/__init__.py b/datacommons_pandas/__init__.py
new file mode 100644
index 00000000..2c0e7268
--- /dev/null
+++ b/datacommons_pandas/__init__.py
@@ -0,0 +1,27 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Data Commons SPARQL query support
+from datacommons_pandas.query import query
+
+# Data Commons Python API
+from datacommons_pandas.core import get_property_labels, get_property_values, get_triples
+from datacommons_pandas.places import get_places_in, get_related_places, get_stats
+from datacommons_pandas.populations import get_populations, get_observations, get_pop_obs, get_place_obs
+from datacommons_pandas.stat_vars import get_stat_value, get_stat_series, get_stat_all
+
+# Other utilities
+from datacommons_pandas.utils import set_api_key
+
+from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
diff --git a/datacommons_pandas/core.py b/datacommons_pandas/core.py
new file mode 120000
index 00000000..15f455cf
--- /dev/null
+++ b/datacommons_pandas/core.py
@@ -0,0 +1 @@
+../datacommons/core.py
\ No newline at end of file
diff --git a/dcpandas/df_builder.py b/datacommons_pandas/df_builder.py
similarity index 99%
rename from dcpandas/df_builder.py
rename to datacommons_pandas/df_builder.py
index 74160755..c965a973 100644
--- a/dcpandas/df_builder.py
+++ b/datacommons_pandas/df_builder.py
@@ -24,7 +24,7 @@
 import pandas as pd
 import six
 
-import datacommons.stat_vars as dc
+import datacommons_pandas.stat_vars as dc
 
 
 def build_time_series(place, stat_var):
diff --git a/dcpandas/examples/__init__.py b/datacommons_pandas/examples/__init__.py
similarity index 100%
rename from dcpandas/examples/__init__.py
rename to datacommons_pandas/examples/__init__.py
diff --git a/dcpandas/examples/df_builder.py b/datacommons_pandas/examples/df_builder.py
similarity index 98%
rename from dcpandas/examples/df_builder.py
rename to datacommons_pandas/examples/df_builder.py
index aabe6519..2939e0e5 100644
--- a/dcpandas/examples/df_builder.py
+++ b/datacommons_pandas/examples/df_builder.py
@@ -17,7 +17,7 @@
 from __future__ import division
 from __future__ import print_function
 
-import dcpandas as dcpd
+import datacommons_pandas as dcpd
 
 
 def main():
diff --git a/datacommons_pandas/places.py b/datacommons_pandas/places.py
new file mode 120000
index 00000000..7206307a
--- /dev/null
+++ b/datacommons_pandas/places.py
@@ -0,0 +1 @@
+../datacommons/places.py
\ No newline at end of file
diff --git a/datacommons_pandas/populations.py b/datacommons_pandas/populations.py
new file mode 120000
index 00000000..3e74c37b
--- /dev/null
+++ b/datacommons_pandas/populations.py
@@ -0,0 +1 @@
+../datacommons/populations.py
\ No newline at end of file
diff --git a/datacommons_pandas/query.py b/datacommons_pandas/query.py
new file mode 120000
index 00000000..d7db3c39
--- /dev/null
+++ b/datacommons_pandas/query.py
@@ -0,0 +1 @@
+../datacommons/query.py
\ No newline at end of file
diff --git a/dcpandas/stat_vars.py b/datacommons_pandas/stat_vars.py
similarity index 100%
rename from dcpandas/stat_vars.py
rename to datacommons_pandas/stat_vars.py
diff --git a/dcpandas/test/__init__.py b/datacommons_pandas/test/__init__.py
similarity index 100%
rename from dcpandas/test/__init__.py
rename to datacommons_pandas/test/__init__.py
diff --git a/dcpandas/test/df_builder_test.py b/datacommons_pandas/test/df_builder_test.py
similarity index 99%
rename from dcpandas/test/df_builder_test.py
rename to datacommons_pandas/test/df_builder_test.py
index 20e3ab76..f917bb9b 100644
--- a/dcpandas/test/df_builder_test.py
+++ b/datacommons_pandas/test/df_builder_test.py
@@ -25,8 +25,8 @@
 except ImportError:
     from mock import patch
 
-import dcpandas.df_builder as dcpd
-import dcpandas.utils as utils
+import datacommons_pandas.df_builder as dcpd
+import datacommons_pandas.utils as utils
 import json
 import unittest
 import six
diff --git a/dcpandas/utils.py b/datacommons_pandas/utils.py
similarity index 100%
rename from dcpandas/utils.py
rename to datacommons_pandas/utils.py
diff --git a/dcpandas/__init__.py b/dcpandas/__init__.py
deleted file mode 100644
index 4a34cabb..00000000
--- a/dcpandas/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright 2020 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dcpandas.stat_vars import get_stat_value, get_stat_series, get_stat_all
-from dcpandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
diff --git a/dcpandas/setup.py b/dcpandas/setup.py
deleted file mode 100644
index fd94ca0e..00000000
--- a/dcpandas/setup.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Install dcpandas."""
-from setuptools import setup, find_packages
-
-with open('README.md', 'r') as fh:
-    long_description = fh.read()
-
-# Package metadata.
-NAME = 'dcpandas'
-DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.'
-URL = 'https://github.com/datacommonsorg/api-python'
-EMAIL = 'support@datacommons.org'
-AUTHOR = 'datacommons.org'
-REQUIRES_PYTHON = '>=2.7'
-VERSION = '0.0.1'
-
-REQUIRED = [
-    'six',
-    'pandas',
-]
-
-setup(
-    name=NAME,
-    version=VERSION,
-    description=DESCRIPTION,
-    long_description=long_description,
-    long_description_content_type='text/markdown',
-    author=AUTHOR,
-    author_email=EMAIL,
-    maintainer=AUTHOR,
-    maintainer_email=EMAIL,
-    python_requires=REQUIRES_PYTHON,
-    url=URL,
-    packages=find_packages(),
-    install_requires=REQUIRED,
-    include_package_data=True,
-    license='Apache 2.0',
-    classifiers=[
-        'Intended Audience :: Developers',
-        'License :: OSI Approved :: Apache Software License',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: Implementation :: CPython',
-        'Topic :: Software Development',
-    ],
-)
diff --git a/setup.py b/setup_datacommons.py
similarity index 100%
rename from setup.py
rename to setup_datacommons.py
diff --git a/setup_dcpandas.py b/setup_datacommons_pandas.py
similarity index 92%
rename from setup_dcpandas.py
rename to setup_datacommons_pandas.py
index 79741bf3..f59a49d7 100644
--- a/setup_dcpandas.py
+++ b/setup_datacommons_pandas.py
@@ -14,7 +14,7 @@
 """Build and distrubute the datacommons_pandas package to PyPI."""
 from setuptools import setup
 
-with open('README.md', 'r') as fh:
+with open('datacommons_pandas/README.md', 'r') as fh:
     long_description = fh.read()
 
 # Package metadata.
@@ -31,8 +31,8 @@
     'pandas',
 ]
 
-PACKAGES = ['dcpandas']
-PACKAGE_DIR = {'dcpandas': 'dcpandas'}
+PACKAGES = ['datacommons_pandas']
+PACKAGE_DIR = {'datacommons_pandas': 'datacommons_pandas'}
 setup(
     name=NAME,
     version=VERSION,

From f645f3fefb30a0867175687fe8e312b5e12c3098 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Mon, 24 Aug 2020 20:02:56 -0700
Subject: [PATCH 26/35] Fix various docstrings.

---
 datacommons/__init__.py           |  8 ++++++++
 datacommons/examples/stat_vars.py |  2 +-
 datacommons_pandas/README.md      |  8 ++++----
 datacommons_pandas/__init__.py    |  9 +++++++--
 datacommons_pandas/df_builder.py  | 32 +++++++++++++++++--------------
 setup_datacommons.py              |  2 +-
 setup_datacommons_pandas.py       |  2 +-
 7 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index 3d7384d5..58903a6b 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -12,6 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+################################## IMPORTANT #################################
+# All user-facing functions in this package must be simlinked to the         #
+# datacommons_pandas pkg. This is so that users do not need to import both   #
+# libraries for pd support. Please keep the below imports in sync with the   #
+# __init__.py in the datacommons_pandas/ dir, and add a simlink when         #
+# creating a new file.                                                       #
+##############################################@################################
+
 # Data Commons SPARQL query support
 from datacommons.query import query
 
diff --git a/datacommons/examples/stat_vars.py b/datacommons/examples/stat_vars.py
index 8ec77a1b..73f353f6 100644
--- a/datacommons/examples/stat_vars.py
+++ b/datacommons/examples/stat_vars.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Basic examples for StatisticalVariable-based param_set Data Commons API functions."""
+"""Basic examples for StatisticalVariable-based Data Commons API functions."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/datacommons_pandas/README.md b/datacommons_pandas/README.md
index 779e247f..135e8224 100644
--- a/datacommons_pandas/README.md
+++ b/datacommons_pandas/README.md
@@ -4,16 +4,16 @@ This is a Python library for creating pandas objects with data in the
 Data Commons Graph.
 To get started, install this package from pip.
 
-    pip install dcpandas
+    pip install datacommons_pandas
 
-Once the package is installed, import `dcpandas`.
+Once the package is installed, import `datacommons_pandas`.
 
-    import dcpandas as dcpd
+    import datacommons_pandas as dcpd
 
 For more detail on getting started with the API, please visit our
 [API Overview](http://docs.datacommons.org/api/).
 
-When you are ready to use the API, you can refer to `dcpandas/examples` for
+When you are ready to use the API, you can refer to `datacommons_pandas/examples` for
 examples on how to use this package to perform various tasks. More tutorials and
 documentation can be found on our [tutorials page](https://datacommons.org/colab)!
 
diff --git a/datacommons_pandas/__init__.py b/datacommons_pandas/__init__.py
index 2c0e7268..0bc6d75d 100644
--- a/datacommons_pandas/__init__.py
+++ b/datacommons_pandas/__init__.py
@@ -12,6 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
+
+################################ SIMLINK FILES ################################
+# We include simlinks to all user-facing functions from the datacommons pkg.  #
+# This is so that users do not need to import both libraries for pd support.  #
+# Please keep the below in sync with the __init__.py in the datacommons/ dir  #
+##############################################@################################
 # Data Commons SPARQL query support
 from datacommons_pandas.query import query
 
@@ -23,5 +30,3 @@
 
 # Other utilities
 from datacommons_pandas.utils import set_api_key
-
-from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
diff --git a/datacommons_pandas/df_builder.py b/datacommons_pandas/df_builder.py
index c965a973..93ccbdef 100644
--- a/datacommons_pandas/df_builder.py
+++ b/datacommons_pandas/df_builder.py
@@ -34,20 +34,23 @@ def build_time_series(place, stat_var):
       place (`str`): The dcid of Place to query for.
       stat_var (`str`): The dcid of the StatisticalVariable.
     Returns:
-      A pandas Series with Place IDs as the index, and Observed statistics as values.
+      A pandas Series with Place IDs as the index, and observed statistics as values.
     """
     return pd.Series(dc.get_stat_series(place, stat_var))
 
 
 def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
-    """Groups the result of `get_stat_all` by Observation options for time series or covariates.
+    """Groups the result of `get_stat_all` by StatVarObservation options for time series or covariates.
     
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
       stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
-      keep_series (`boolean`): if True, output time series grouped by Observation options; if False, output latest Observation grouped by Observation options.
+      keep_series (`boolean`): if True, output time series grouped by
+        StatVarObservation options; if False, output latest statistics grouped
+        by StatVarObservation options.
     Returns:
-      A pandas Series with Place IDs as the index, and Observed statistics as values.
+      A pandas Series with Place IDs as the index, and observed statistics as
+      values.
 
     Raises:
       ValueError: If the payload returned by the Data Commons REST API is
@@ -97,7 +100,7 @@ def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
 def _time_series_pd_input(places, stat_var):
     """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
 
-    Data Commons will pick a set of Observation options that covers the
+    Data Commons will pick a set of StatVarObservation options that covers the
     maximum number of queried places. Among ties, Data Commons selects an option
     set with the latest Observation.
 
@@ -154,10 +157,10 @@ def build_time_series_dataframe(places, stat_var, desc_col=False):
     """Constructs a pandas DataFrame with `places` as the index and dates of the time series as the columns.
 
     To ensure statistics are comparable across all Places, when multiple
-    StatVarObservations are available for Place and StatVar combos, Data
-    Commons selects the Observation options that covers the most Places, and breaks
-    ties using the Observation options that yield the latest Observation for any
-    Place.
+    StatVarObservations options are available for Place and StatVar combos,
+    Data Commons selects the StatVarObservation options that covers the most
+    Places, and breaks ties using the StatVarObservation options that yield
+    the latest Observation for any Place.
     
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
@@ -200,7 +203,8 @@ def _covariate_pd_input(places, stat_vars):
       the time series and place identifier.
 
     Examples:
-      >>> _covariate_pd_input(["geoId/29", "geoId/33"], ["Count_Person", "Median_Income_Person"])
+      >>> _covariate_pd_input(["geoId/29", "geoId/33"],
+                              ["Count_Person", "Median_Income_Person"])
           [
             {'Count_Person': 20, 'Median_Income_Person': 40, 'place': 'geoId/29'},
             {'Count_Person': 428, 'Median_Income_Person': 429, 'place': 'geoId/33'}
@@ -254,10 +258,10 @@ def build_covariate_dataframe(places, stat_vars):
     """Constructs a pandas DataFrame with `places` as the index and `stat_vars` as the columns.
 
     To ensure statistics are comparable across all Places, when multiple
-    StatVarObservations are available for Place and StatVar combos, Data
-    Commons selects the Observation options that covers the most Places, and breaks
-    ties using the Observation options that yield the latest Observation for any
-    Place.
+    StatVarObservations options are available for Place and StatVar combos,
+    Data Commons selects the StatVarObservation options that covers the most
+    Places, and breaks ties using the StatVarObservation options that yield
+    the latest Observation for any Place.
     
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
diff --git a/setup_datacommons.py b/setup_datacommons.py
index f266ac52..bf3efd64 100644
--- a/setup_datacommons.py
+++ b/setup_datacommons.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Build and distrubute the datacommons package to PyPI."""
+"""Build and distribute the datacommons package to PyPI."""
 from setuptools import setup
 
 with open('README.md', 'r') as fh:
diff --git a/setup_datacommons_pandas.py b/setup_datacommons_pandas.py
index f59a49d7..ec6034bd 100644
--- a/setup_datacommons_pandas.py
+++ b/setup_datacommons_pandas.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Build and distrubute the datacommons_pandas package to PyPI."""
+"""Build and distribute the datacommons_pandas package to PyPI."""
 from setuptools import setup
 
 with open('datacommons_pandas/README.md', 'r') as fh:

From 18cb93e0749a32a8a8d8051c86b48c81f1042d13 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 07:54:29 -0700
Subject: [PATCH 27/35] Add optional args to pandas lib build_time_series to
 pass onto python get_stat_series.

---
 datacommons_pandas/df_builder.py          | 11 +++++++++--
 datacommons_pandas/examples/df_builder.py |  5 +++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/datacommons_pandas/df_builder.py b/datacommons_pandas/df_builder.py
index 93ccbdef..501ecb5a 100644
--- a/datacommons_pandas/df_builder.py
+++ b/datacommons_pandas/df_builder.py
@@ -27,7 +27,12 @@
 import datacommons_pandas.stat_vars as dc
 
 
-def build_time_series(place, stat_var):
+def build_time_series(place,
+                      stat_var,
+                      measurement_method=None,
+                      observation_period=None,
+                      unit=None,
+                      scaling_factor=None):
     """Constructs a pandas Series with `dates` as the index and corresponding `stat_var` statistics as values.
     
     Args:
@@ -36,7 +41,9 @@ def build_time_series(place, stat_var):
     Returns:
       A pandas Series with Place IDs as the index, and observed statistics as values.
     """
-    return pd.Series(dc.get_stat_series(place, stat_var))
+    return pd.Series(
+        dc.get_stat_series(place, stat_var, measurement_method,
+                           observation_period, unit, scaling_factor))
 
 
 def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
diff --git a/datacommons_pandas/examples/df_builder.py b/datacommons_pandas/examples/df_builder.py
index 2939e0e5..735b1d5a 100644
--- a/datacommons_pandas/examples/df_builder.py
+++ b/datacommons_pandas/examples/df_builder.py
@@ -25,6 +25,11 @@ def main():
     print("""
 # Build a pd.Series of time series for one variable and one place.
 $ dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")
+{}""".format(dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")))
+
+    print("""
+# Build a pd.Series of time series for one variable and one place and optional args.
+$ dcpd.build_time_series("country/USA", "Count_Person", "CensusPEPSurvey")
 {}""".format(dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")))
 
     print("""

From 51582c83297fcc9f608a2cf2a7744b6d0264f3d4 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 07:57:35 -0700
Subject: [PATCH 28/35] Update docstrings for time series funcs.

---
 datacommons/stat_vars.py         | 6 +++---
 datacommons_pandas/df_builder.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/datacommons/stat_vars.py b/datacommons/stat_vars.py
index bd2ba72f..dd6c2cee 100644
--- a/datacommons/stat_vars.py
+++ b/datacommons/stat_vars.py
@@ -72,9 +72,9 @@ def get_stat_value(place,
         url += '&scaling_factor={}'.format(scaling_factor)
 
     try:
-      res_json = utils._send_request(url, post=False, use_payload=False)
+        res_json = utils._send_request(url, post=False, use_payload=False)
     except ValueError:
-      raise ValueError('No data in response.')
+        raise ValueError('No data in response.')
     return res_json['value']
 
 
@@ -97,7 +97,7 @@ def get_stat_series(place,
       scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
     Returns:
       A `dict` mapping dates to value of `stat_var` for `place`,
-      filtered by optional args.
+      representing a time series that satisfies all input parameters.
 
     Raises:
       ValueError: If the payload returned by the Data Commons REST API is
diff --git a/datacommons_pandas/df_builder.py b/datacommons_pandas/df_builder.py
index 501ecb5a..72889afd 100644
--- a/datacommons_pandas/df_builder.py
+++ b/datacommons_pandas/df_builder.py
@@ -38,8 +38,15 @@ def build_time_series(place,
     Args:
       place (`str`): The dcid of Place to query for.
       stat_var (`str`): The dcid of the StatisticalVariable.
+      measurement_method (`str`): Optional, the dcid of the preferred
+        `measurementMethod` value.
+      observation_period (`str`): Optional, the preferred
+        `observationPeriod` value.
+      unit (`str`): Optional, the dcid of the preferred `unit` value.
+      scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
     Returns:
-      A pandas Series with Place IDs as the index, and observed statistics as values.
+      A pandas Series with Place IDs as the index and observed statistics as
+      values, representing a time series satisfying all optional args.
     """
     return pd.Series(
         dc.get_stat_series(place, stat_var, measurement_method,

From a49a095d4d24cbc029c6aa5e04a381739e13ec88 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 09:33:04 -0700
Subject: [PATCH 29/35] Remove will from CHANGELOG.

---
 datacommons_pandas/CHANGELOG.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datacommons_pandas/CHANGELOG.md b/datacommons_pandas/CHANGELOG.md
index 068f599a..c130193d 100644
--- a/datacommons_pandas/CHANGELOG.md
+++ b/datacommons_pandas/CHANGELOG.md
@@ -10,9 +10,9 @@
 
 Added pandas wrapper functions.
 
--   `build_time_series` will construct a pd.Series for a given StatisticalVariable and Place, where dates are the index for the time series.
--   `build_time_series_dataframe` will construct a pd.DataFrame for a given StatisticalVariable and a set of Places. The DataFrame will have Places as the index and dates as the columns.
--   `build_covariate_dataframe` will construct a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have  Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
+-   `build_time_series` constructs a pd.Series for a given StatisticalVariable and Place, where dates are the index for the time series.
+-   `build_time_series_dataframe` constructs a pd.DataFrame for a given StatisticalVariable and a set of Places. The DataFrame will have Places as the index and dates as the columns.
+-   `build_covariate_dataframe` constructs a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have  Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
 
 For multi-place functions, when a StatisticalVariable has multiple StatVarObservation options,
 Data Commons chooses a set of StatVarObservation options that covers the most geos. This

From 7f46fdd2a3fe9210927a4ec499f196ff388f585a Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 09:41:02 -0700
Subject: [PATCH 30/35] Reference TODO for cloudbuild pandas-python sync check.
 Update changelog.

---
 datacommons/__init__.py         | 3 ++-
 datacommons_pandas/CHANGELOG.md | 2 +-
 datacommons_pandas/__init__.py  | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index 58903a6b..182ecc2e 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -18,7 +18,8 @@
 # libraries for pd support. Please keep the below imports in sync with the   #
 # __init__.py in the datacommons_pandas/ dir, and add a simlink when         #
 # creating a new file.                                                       #
-##############################################@################################
+# TODO: https://github.com/datacommonsorg/api-python/issues/149              #
+##############################################################################
 
 # Data Commons SPARQL query support
 from datacommons.query import query
diff --git a/datacommons_pandas/CHANGELOG.md b/datacommons_pandas/CHANGELOG.md
index c130193d..77ea2b0a 100644
--- a/datacommons_pandas/CHANGELOG.md
+++ b/datacommons_pandas/CHANGELOG.md
@@ -10,7 +10,7 @@
 
 Added pandas wrapper functions.
 
--   `build_time_series` constructs a pd.Series for a given StatisticalVariable and Place, where dates are the index for the time series.
+-   `build_time_series` constructs a pd.Series for a given StatisticalVariable and Place, where the time series are indexed by date.
 -   `build_time_series_dataframe` constructs a pd.DataFrame for a given StatisticalVariable and a set of Places. The DataFrame will have Places as the index and dates as the columns.
 -   `build_covariate_dataframe` constructs a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have  Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
 
diff --git a/datacommons_pandas/__init__.py b/datacommons_pandas/__init__.py
index 0bc6d75d..b68f5d65 100644
--- a/datacommons_pandas/__init__.py
+++ b/datacommons_pandas/__init__.py
@@ -18,6 +18,7 @@
 # We include simlinks to all user-facing functions from the datacommons pkg.  #
 # This is so that users do not need to import both libraries for pd support.  #
 # Please keep the below in sync with the __init__.py in the datacommons/ dir  #
+# TODO: enforce this. https://github.com/datacommonsorg/api-python/issues/149 #
 ##############################################@################################
 # Data Commons SPARQL query support
 from datacommons_pandas.query import query

From 87b13ec2c8373a0beef1d7c086c83e935728e509 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 12:50:00 -0700
Subject: [PATCH 31/35] Rename covariate* to multivariate*, address cyin's
 comments on df_builder; add test for raising no data error; fix various tests
 that were returning the type HTTPError instead of an instance of HTTPError.

---
 datacommons/__init__.py                    |  4 +-
 datacommons/test/core_test.py              |  2 +-
 datacommons/test/places_test.py            |  2 +-
 datacommons/test/stat_vars_test.py         |  2 +-
 datacommons/utils.py                       |  5 ++-
 datacommons_pandas/README.md               |  1 +
 datacommons_pandas/__init__.py             |  6 +--
 datacommons_pandas/df_builder.py           | 47 +++++++++++++---------
 datacommons_pandas/examples/df_builder.py  | 25 +++++++++---
 datacommons_pandas/test/df_builder_test.py | 28 +++++++++----
 10 files changed, 80 insertions(+), 42 deletions(-)

diff --git a/datacommons/__init__.py b/datacommons/__init__.py
index 182ecc2e..93d61ab4 100644
--- a/datacommons/__init__.py
+++ b/datacommons/__init__.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 ################################## IMPORTANT #################################
-# All user-facing functions in this package must be simlinked to the         #
+# All user-facing functions in this package must be symlinked to the         #
 # datacommons_pandas pkg. This is so that users do not need to import both   #
 # libraries for pd support. Please keep the below imports in sync with the   #
-# __init__.py in the datacommons_pandas/ dir, and add a simlink when         #
+# __init__.py in the datacommons_pandas/ dir, and add a symlink when         #
 # creating a new file.                                                       #
 # TODO: https://github.com/datacommonsorg/api-python/issues/149              #
 ##############################################################################
diff --git a/datacommons/test/core_test.py b/datacommons/test/core_test.py
index 8dae3443..e64064a6 100644
--- a/datacommons/test/core_test.py
+++ b/datacommons/test/core_test.py
@@ -311,7 +311,7 @@ def read(self):
       return MockResponse(json.dumps({'payload': res_json}))
 
   # Otherwise, return an empty response and a 404.
-  return urllib.error.HTTPError
+  return urllib.error.HTTPError(None, 404, None, None, None)
 
 
 class TestGetPropertyLabels(unittest.TestCase):
diff --git a/datacommons/test/places_test.py b/datacommons/test/places_test.py
index 6c57a8cd..d4147655 100644
--- a/datacommons/test/places_test.py
+++ b/datacommons/test/places_test.py
@@ -212,7 +212,7 @@ def read(self):
     return MockResponse(json.dumps({'payload': res_json}))
 
   # Otherwise, return an empty response and a 404.
-  return urllib.error.HTTPError
+  return urllib.error.HTTPError(None, 404, None, None, None)
 
 class TestGetPlacesIn(unittest.TestCase):
   """ Unit stests for get_places_in. """
diff --git a/datacommons/test/stat_vars_test.py b/datacommons/test/stat_vars_test.py
index 613674a6..e7a5a44d 100644
--- a/datacommons/test/stat_vars_test.py
+++ b/datacommons/test/stat_vars_test.py
@@ -248,7 +248,7 @@ def read(self):
             return MockResponse(json.dumps(resp))
 
     # Otherwise, return an empty response and a 404.
-    return urllib.error.HTTPError
+    return urllib.error.HTTPError(None, 404, None, None, None)
 
 
 class TestGetStatValue(unittest.TestCase):
diff --git a/datacommons/utils.py b/datacommons/utils.py
index b0f855e5..502205e9 100644
--- a/datacommons/utils.py
+++ b/datacommons/utils.py
@@ -113,7 +113,10 @@ def _send_request(req_url, req_json={}, compress=False, post=True, use_payload=T
     raise ValueError(
         'Response error: An HTTP {} code was returned by the mixer. Printing '
         'response\n\n{}'.format(e.code, e.read()))
-
+  if isinstance(res, six.moves.urllib.error.HTTPError):
+      raise ValueError(
+          'Response error: An HTTP {} code was returned by the mixer. Printing '
+          'response\n\n{}'.format(res.code, res.msg))
   # Get the JSON
   res_json = json.loads(res.read())
   if not use_payload:
diff --git a/datacommons_pandas/README.md b/datacommons_pandas/README.md
index 135e8224..0386fec3 100644
--- a/datacommons_pandas/README.md
+++ b/datacommons_pandas/README.md
@@ -2,6 +2,7 @@
 
 This is a Python library for creating pandas objects with data in the
 Data Commons Graph.
+
 To get started, install this package from pip.
 
     pip install datacommons_pandas
diff --git a/datacommons_pandas/__init__.py b/datacommons_pandas/__init__.py
index b68f5d65..c19dca9a 100644
--- a/datacommons_pandas/__init__.py
+++ b/datacommons_pandas/__init__.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_covariate_dataframe
+from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_multivariate_dataframe
 
-################################ SIMLINK FILES ################################
-# We include simlinks to all user-facing functions from the datacommons pkg.  #
+################################ SYMLINK FILES ################################
+# We include symlinks to all user-facing functions from the datacommons pkg.  #
 # This is so that users do not need to import both libraries for pd support.  #
 # Please keep the below in sync with the __init__.py in the datacommons/ dir  #
 # TODO: enforce this. https://github.com/datacommonsorg/api-python/issues/149 #
diff --git a/datacommons_pandas/df_builder.py b/datacommons_pandas/df_builder.py
index 72889afd..635d7f31 100644
--- a/datacommons_pandas/df_builder.py
+++ b/datacommons_pandas/df_builder.py
@@ -54,7 +54,11 @@ def build_time_series(place,
 
 
 def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
-    """Groups the result of `get_stat_all` by StatVarObservation options for time series or covariates.
+    """Groups the result of `get_stat_all` by StatVarObservation options for time series or multivariates.
+
+    Note that this function does not preserve `(place, stat_var)` pairs that
+    yield no data `from get_stat_all`. In the extreme case, no data
+    for any pairs will return an empty dict.
     
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
@@ -63,8 +67,9 @@ def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
         StatVarObservation options; if False, output latest statistics grouped
         by StatVarObservation options.
     Returns:
-      A pandas Series with Place IDs as the index, and observed statistics as
-      values.
+      A nested dict mapping each StatisticalVariable in `stat_vars` to its
+      StatVarObservation options. In turn, each StatVarObservation option
+      maps to a list of rows, one per place, with the place id and stat data.
 
     Raises:
       ValueError: If the payload returned by the Data Commons REST API is
@@ -88,7 +93,7 @@ def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
                 continue
             for source_series in stat_var_data['sourceSeries']:
                 series = source_series['val']
-                # Create a hashable for Observation options.
+                # Convert dict of SVO options into nested tuple (hashable key).
                 obs_options = (('measurementMethod',
                                 source_series.get('measurementMethod')),
                                ('observationPeriod',
@@ -105,6 +110,8 @@ def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
                         'date': date,
                         'val': series[date]
                     })
+    if not res:
+        raise ValueError('No data for any of specified places and stat_vars.')
     if keep_series:
         return dict(res)
     else:
@@ -136,21 +143,22 @@ def _time_series_pd_input(places, stat_var):
     rows_dict = _group_stat_all_by_obs_options(places, [stat_var],
                                                keep_series=True)
     most_geos = []
-    max_geos_so_far = 0
+    max_geo_count_so_far = 0
     latest_date = []
     latest_date_so_far = ''
     for options, rows in rows_dict.items():
         current_geos = len(rows)
-        if current_geos > max_geos_so_far:
-            max_geos_so_far = current_geos
+        if current_geos > max_geo_count_so_far:
+            max_geo_count_so_far = current_geos
             most_geos = [options]
             # Reset tiebreaker stats. Recompute after this if-else block.
             latest_date = []
             latest_date_so_far = ''
-        elif current_geos == max_geos_so_far:
+        elif current_geos == max_geo_count_so_far:
             most_geos.append(options)
         else:
-            # Do not compute tiebreaker stats if not in most_geos.
+            # Do not compute tiebreaker stats if no change to most_geos.
+            # Skip to top of the for loop.
             continue
 
         for row in rows:
@@ -202,7 +210,7 @@ def build_time_series_dataframe(places, stat_var, desc_col=False):
     return df[sorted(df.columns, reverse=desc_col)]
 
 
-def _covariate_pd_input(places, stat_vars):
+def _multivariate_pd_input(places, stat_vars):
     """Returns a `list` of `dict` per element of `places` based on the `stat_var`.
 
     Data Commons will pick a set of StatVarObservation options that covers the
@@ -217,7 +225,7 @@ def _covariate_pd_input(places, stat_vars):
       the time series and place identifier.
 
     Examples:
-      >>> _covariate_pd_input(["geoId/29", "geoId/33"],
+      >>> _multivariate_pd_input(["geoId/29", "geoId/33"],
                               ["Count_Person", "Median_Income_Person"])
           [
             {'Count_Person': 20, 'Median_Income_Person': 40, 'place': 'geoId/29'},
@@ -233,22 +241,23 @@ def _covariate_pd_input(places, stat_vars):
     for stat_var, candidates_dict in rows_dict.items():
         selected_rows = None
         most_geos = []
-        max_geos_so_far = 0
+        max_geo_count_so_far = 0
         latest_date = []
         latest_date_so_far = ''
         for options, rows in candidates_dict.items():
             current_geos = len(rows)
-            if current_geos > max_geos_so_far:
-                max_geos_so_far = current_geos
+            if current_geos > max_geo_count_so_far:
+                max_geo_count_so_far = current_geos
                 most_geos = [options]
                 # Reset tiebreaker stats. Recompute after this if-else block.
                 latest_date = []
                 latest_date_so_far = ''
-            elif current_geos == max_geos_so_far:
+            elif current_geos == max_geo_count_so_far:
                 most_geos.append(options)
             else:
                 # Do not compute tiebreaker stats if not in most_geos.
                 continue
+
             for row in rows:
                 row_date = row['date']
                 if row_date > latest_date_so_far:
@@ -263,12 +272,12 @@ def _covariate_pd_input(places, stat_vars):
         for row in selected_rows:
             place2cov[row['place']][stat_var] = row['val']
     return [
-        dict({'place': place}, **covariates)
-        for place, covariates in place2cov.items()
+        dict({'place': place}, **multivariates)
+        for place, multivariates in place2cov.items()
     ]
 
 
-def build_covariate_dataframe(places, stat_vars):
+def build_multivariate_dataframe(places, stat_vars):
     """Constructs a pandas DataFrame with `places` as the index and `stat_vars` as the columns.
 
     To ensure statistics are comparable across all Places, when multiple
@@ -300,7 +309,7 @@ def build_covariate_dataframe(places, stat_vars):
         raise ValueError(
             'Parameter `places` and `stat_vars` must be string object or list-like object.'
         )
-    df = pd.DataFrame.from_records(_covariate_pd_input(places, stat_vars))
+    df = pd.DataFrame.from_records(_multivariate_pd_input(places, stat_vars))
     df.set_index('place', inplace=True)
     df.sort_index(inplace=True)
     return df
diff --git a/datacommons_pandas/examples/df_builder.py b/datacommons_pandas/examples/df_builder.py
index 735b1d5a..63fecfc9 100644
--- a/datacommons_pandas/examples/df_builder.py
+++ b/datacommons_pandas/examples/df_builder.py
@@ -49,13 +49,13 @@ def main():
 
     print("""
 # Build a DataFrame of latest observations for multiple variables in multiple places.
-$ dcpd.build_covariate_dataframe(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person", "Count_Household"])
+$ dcpd.build_multivariate_dataframe(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person", "Count_Household"])
 {}""".format(
-        dcpd.build_covariate_dataframe(
+        dcpd.build_multivariate_dataframe(
             ["geoId/06", "country/FRA"],
             ["Median_Age_Person", "Count_Person", "Count_Household"])))
 
-    print('\n\nExpect 4 errors, starting HERE:')
+    print('\n\nExpect 6 errors, starting HERE:')
     try:
         dcpd.build_time_series_dataframe(
             ["geoId/33"], ["Median_Income_Person", "Count_Person"])
@@ -66,12 +66,25 @@ def main():
     except ValueError as e:
         print("Successfully errored on: ", e)
     try:
-        dcpd.build_covariate_dataframe([3],
-                                       ["Median_Income_Person", "Count_Person"])
+        dcpd.build_multivariate_dataframe(
+            [3], ["Median_Income_Person", "Count_Person"])
     except ValueError as e:
         print("Successfully errored on: ", e)
     try:
-        dcpd.build_covariate_dataframe("country/USA", True)
+        dcpd.build_multivariate_dataframe("country/USA", True)
+    except ValueError as e:
+        print("Successfully errored on: ", e)
+    # If the following two do not error due to the addition of
+    # Median_Income_Person statistics for NUTS geos, then please
+    # replace either the places or the StatVar.
+    try:
+        dcpd.build_time_series_dataframe(['nuts/HU2', 'nuts/HU22'],
+                                         'Median_Income_Person')
+    except ValueError as e:
+        print("Successfully errored on: ", e)
+    try:
+        dcpd.build_multivariate_dataframe(['nuts/HU2', 'nuts/HU22'],
+                                          ['Median_Income_Person'])
     except ValueError as e:
         print("Successfully errored on: ", e)
     print('until HERE.')
diff --git a/datacommons_pandas/test/df_builder_test.py b/datacommons_pandas/test/df_builder_test.py
index f917bb9b..e686b16d 100644
--- a/datacommons_pandas/test/df_builder_test.py
+++ b/datacommons_pandas/test/df_builder_test.py
@@ -216,7 +216,7 @@ def read(self):
             }
             return MockResponse(json.dumps(resp))
     # Otherwise, return an empty response and a 404.
-    return urllib.error.HTTPError
+    return urllib.error.HTTPError(None, 404, None, None, None)
 
 
 class TestPdTimeSeries(unittest.TestCase):
@@ -253,14 +253,14 @@ def test_one_place(self, urlopen):
         self.assertEqual(rows, exp)
 
 
-class TestPdCovariates(unittest.TestCase):
-    """Unit tests for _covariate_pd_input."""
+class TestPdMultivariates(unittest.TestCase):
+    """Unit tests for _multivariate_pd_input."""
 
     @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
     def test_basic(self, urlopen):
-        """Calling _covariate_pd_input with proper args."""
-        rows = dcpd._covariate_pd_input(['geoId/06', 'nuts/HU22'],
-                                        ['Count_Person', 'Median_Age_Person'])
+        """Calling _multivariate_pd_input with proper args."""
+        rows = dcpd._multivariate_pd_input(
+            ['geoId/06', 'nuts/HU22'], ['Count_Person', 'Median_Age_Person'])
         exp = [{
             "place": "geoId/06",
             "Median_Age_Person": 24,
@@ -273,11 +273,23 @@ def test_basic(self, urlopen):
 
     @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
     def test_one_each(self, urlopen):
-        """Calling _covariate_pd_input with single place and var."""
-        rows = dcpd._covariate_pd_input(['geoId/06'], ['Count_Person'])
+        """Calling _multivariate_pd_input with single place and var."""
+        rows = dcpd._multivariate_pd_input(['geoId/06'], ['Count_Person'])
         exp = [{"place": "geoId/06", "Count_Person": 25090}]
         self.assertEqual(rows, exp)
 
+    @patch('six.moves.urllib.request.urlopen', side_effect=request_mock)
+    def test_no_data(self, urlopen):
+        """Error if there is no data."""
+        with self.assertRaises(ValueError):
+            dcpd._group_stat_all_by_obs_options(
+                ['FOO/100'], ['Count_Person', 'Median_Age_Person'])
+        with self.assertRaises(ValueError):
+            dcpd._time_series_pd_input(['FOO/100', 'BAR/200'], ['Count_Person'])
+        with self.assertRaises(ValueError):
+            dcpd._multivariate_pd_input(['FOO/100', 'BAR/200'],
+                                        ['Count_Person', 'Median_Age_Person'])
+
 
 if __name__ == '__main__':
     unittest.main()

From f116e42b08fe1882b5b940dbea216fead6326272 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 12:55:28 -0700
Subject: [PATCH 32/35] Update docstring for _group_stat_all_by_obs_options.

---
 datacommons_pandas/df_builder.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/datacommons_pandas/df_builder.py b/datacommons_pandas/df_builder.py
index 635d7f31..92f580c7 100644
--- a/datacommons_pandas/df_builder.py
+++ b/datacommons_pandas/df_builder.py
@@ -57,8 +57,8 @@ def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
     """Groups the result of `get_stat_all` by StatVarObservation options for time series or multivariates.
 
     Note that this function does not preserve `(place, stat_var)` pairs that
-    yield no data `from get_stat_all`. In the extreme case, no data
-    for any pairs will return an empty dict.
+    yield no data `from get_stat_all`. In the extreme case that there is no
+    data for any pairs, raise a ValueError instead of returning an empty dict.
     
     Args:
       places (`str` or `iterable` of `str`): The dcids of Places to query for.
@@ -73,7 +73,8 @@ def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
 
     Raises:
       ValueError: If the payload returned by the Data Commons REST API is
-        malformed.
+        malformed, or if there is no data for any (Place, StatisticalVariables)
+        pair.
     """
     if keep_series:
         if len(stat_vars) != 1:

From 975c956aacbfdec979cf6166150af190233c105c Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 15:20:54 -0700
Subject: [PATCH 33/35] Make err msg for  _group_stat_all_by_obs_options no
 data more general.

---
 datacommons_pandas/df_builder.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datacommons_pandas/df_builder.py b/datacommons_pandas/df_builder.py
index 92f580c7..44dcb69f 100644
--- a/datacommons_pandas/df_builder.py
+++ b/datacommons_pandas/df_builder.py
@@ -112,7 +112,8 @@ def _group_stat_all_by_obs_options(places, stat_vars, keep_series=True):
                         'val': series[date]
                     })
     if not res:
-        raise ValueError('No data for any of specified places and stat_vars.')
+        raise ValueError(
+            'No data for any of specified Places and StatisticalVariables.')
     if keep_series:
         return dict(res)
     else:

From 9865e098225c88ba8140efe9b89b9bee0a22be36 Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 17:02:48 -0700
Subject: [PATCH 34/35] Parameterize some pandas lib example functions.

---
 datacommons_pandas/examples/df_builder.py | 106 +++++++++++++++-------
 1 file changed, 75 insertions(+), 31 deletions(-)

diff --git a/datacommons_pandas/examples/df_builder.py b/datacommons_pandas/examples/df_builder.py
index 63fecfc9..6dff26bf 100644
--- a/datacommons_pandas/examples/df_builder.py
+++ b/datacommons_pandas/examples/df_builder.py
@@ -20,58 +20,95 @@
 import datacommons_pandas as dcpd
 
 
-def main():
+def build_time_series_example():
 
     print("""
 # Build a pd.Series of time series for one variable and one place.
-$ dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")
-{}""".format(dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")))
+$ dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent')
+{}""".format(dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent')))
 
     print("""
 # Build a pd.Series of time series for one variable and one place and optional args.
-$ dcpd.build_time_series("country/USA", "Count_Person", "CensusPEPSurvey")
-{}""".format(dcpd.build_time_series("country/CAN", "Count_WildlandFireEvent")))
-
-    print("""
-# Build a DataFrame of time series for one variable in multiple places.
-$ dcpd.build_time_series_dataframe(["geoId/29", "geoId/33"], "Median_Income_Person")
+$ dcpd.build_time_series('country/USA', 'Count_Person', 'CensusPEPSurvey')
 {}""".format(
-        dcpd.build_time_series_dataframe(
-            ["geoId/33", "geoId/29", "country/USA"], "Median_Income_Person")))
+        dcpd.build_time_series('country/USA', 'Count_Person',
+                               'CensusPEPSurvey')))
 
-    print("""
-# Build a DataFrame of time series with columns sorted in descending order.
-$ dcpd.build_time_series_dataframe(["country/USA"], "Median_Income_Person", desc_col=True)
-{}""".format(
-        dcpd.build_time_series_dataframe(["country/USA"],
-                                         "Median_Income_Person",
-                                         desc_col=True)))
 
-    print("""
-# Build a DataFrame of latest observations for multiple variables in multiple places.
-$ dcpd.build_multivariate_dataframe(["geoId/06", "country/FRA"], ["Median_Age_Person", "Count_Person", "Count_Household"])
-{}""".format(
-        dcpd.build_multivariate_dataframe(
-            ["geoId/06", "country/FRA"],
-            ["Median_Age_Person", "Count_Person", "Count_Household"])))
+def build_time_series_dataframe_example():
+
+    def demonstrate_build_time_series_dataframe(intro_str,
+                                                places,
+                                                stat_var,
+                                                desc_col=False):
+        arg_str = "{}, '{}'".format(places, stat_var)
+        if desc_col:
+            arg_str += ", desc_col=True"
+        print("""
+    # {}
+    $ dcpd.build_time_series_dataframe({})
+    {}""".format(intro_str, arg_str,
+                 dcpd.build_time_series_dataframe(places, stat_var, desc_col)))
+
+    build_time_series_dataframe_params = [{
+        'intro_str':
+            'Build a DataFrame of time series for one variable in multiple places.',
+        'places': ['geoId/33', 'geoId/29', 'country/USA'],
+        'stat_var':
+            'Median_Income_Person'
+    }, {
+        'intro_str':
+            'Build a DataFrame of time series with columns sorted in descending order.',
+        'places': ['country/USA'],
+        'stat_var':
+            'Median_Income_Person',
+        'desc_col':
+            True
+    }]
 
-    print('\n\nExpect 6 errors, starting HERE:')
+    for param_set in build_time_series_dataframe_params:
+        demonstrate_build_time_series_dataframe(**param_set)
+
+
+def build_multivariate_dataframe_example():
+
+    def demonstrate_build_multivariate_dataframe(intro_str, places, stat_vars):
+        print("""
+    # {}
+    $ dcpd.build_multivariate_dataframe({}, {})
+    {}""".format(intro_str, places, stat_vars,
+                 dcpd.build_multivariate_dataframe(places, stat_vars)))
+
+    build_multivariate_dataframe_params = [{
+        'intro_str':
+            'Build a DataFrame of latest observations for multiple variables in multiple places.',
+        'places': ['geoId/06', 'country/FRA'],
+        'stat_vars': ['Median_Age_Person', 'Count_Person', 'Count_Household']
+    }]
+
+    for param_set in build_multivariate_dataframe_params:
+        demonstrate_build_multivariate_dataframe(**param_set)
+
+
+def expect_err_examples():
+
+    print("\n\nExpect 6 errors, starting HERE:")
     try:
         dcpd.build_time_series_dataframe(
-            ["geoId/33"], ["Median_Income_Person", "Count_Person"])
+            ['geoId/33'], ['Median_Income_Person', 'Count_Person'])
     except ValueError as e:
         print("Successfully errored on: ", e)
     try:
-        dcpd.build_time_series_dataframe(24, ["Median_Income_Person"])
+        dcpd.build_time_series_dataframe(24, ['Median_Income_Person'])
     except ValueError as e:
         print("Successfully errored on: ", e)
     try:
         dcpd.build_multivariate_dataframe(
-            [3], ["Median_Income_Person", "Count_Person"])
+            [3], ['Median_Income_Person', 'Count_Person'])
     except ValueError as e:
         print("Successfully errored on: ", e)
     try:
-        dcpd.build_multivariate_dataframe("country/USA", True)
+        dcpd.build_multivariate_dataframe('country/USA', True)
     except ValueError as e:
         print("Successfully errored on: ", e)
     # If the following two do not error due to the addition of
@@ -87,7 +124,14 @@ def main():
                                           ['Median_Income_Person'])
     except ValueError as e:
         print("Successfully errored on: ", e)
-    print('until HERE.')
+    print("until HERE.")
+
+
+def main():
+    build_time_series_example()
+    build_time_series_dataframe_example()
+    build_multivariate_dataframe_example()
+    expect_err_examples()
 
 
 if __name__ == '__main__':

From 14dea408c8d7a268e73b5a6992f9795520a3722f Mon Sep 17 00:00:00 2001
From: tjann <18621425+tjann@users.noreply.github.com>
Date: Tue, 25 Aug 2020 18:52:59 -0700
Subject: [PATCH 35/35] Released pandas.

---
 .vscode/settings.json           | 3 +++
 datacommons_pandas/CHANGELOG.md | 6 +++---
 datacommons_pandas/__init__.py  | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)
 create mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..99b087e5
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+  "python.linting.enabled": true
+}
\ No newline at end of file
diff --git a/datacommons_pandas/CHANGELOG.md b/datacommons_pandas/CHANGELOG.md
index 77ea2b0a..aae73af0 100644
--- a/datacommons_pandas/CHANGELOG.md
+++ b/datacommons_pandas/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 ## 0.0.1
 
-**Date** - 08/24/2020
+**Date** - 08/25/2020
 
 **Release Tag** - [pd.0.0.1](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.1)
 
@@ -12,10 +12,10 @@ Added pandas wrapper functions.
 
 -   `build_time_series` constructs a pd.Series for a given StatisticalVariable and Place, where the time series are indexed by date.
 -   `build_time_series_dataframe` constructs a pd.DataFrame for a given StatisticalVariable and a set of Places. The DataFrame will have Places as the index and dates as the columns.
--   `build_covariate_dataframe` constructs a covariate pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have  Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
+-   `build_multivariate_dataframe` constructs a pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
 
 For multi-place functions, when a StatisticalVariable has multiple StatVarObservation options,
-Data Commons chooses a set of StatVarObservation options that covers the most geos. This
+Data Commons chooses a set of StatVarObservation options that covers the most places. This
 ensures that the data fetched for a StatisticalVariable is comparable across places.
 When there is a tie, we select the StatVarObservation options set with the latest date
 data is available for any place.
diff --git a/datacommons_pandas/__init__.py b/datacommons_pandas/__init__.py
index c19dca9a..353c395d 100644
--- a/datacommons_pandas/__init__.py
+++ b/datacommons_pandas/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2017 Google Inc.
+# Copyright 2020 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.