Skip to content

Commit

Permalink
link to new notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
JoeGermuska committed May 9, 2015
1 parent 3c7321f commit 9b9ad38
Show file tree
Hide file tree
Showing 3 changed files with 384 additions and 0 deletions.
312 changes: 312 additions & 0 deletions Basic Census Reporter API with Pandas.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Basic Census Reporter API with Pandas"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from censusreporter_api import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = get_dataframe(column_names=True,level=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Top 5 States by Total Population"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>Total</th>\n",
" <th>Male</th>\n",
" <th>Female</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>04000US06</th>\n",
" <td>California</td>\n",
" <td>38332521</td>\n",
" <td>19072246</td>\n",
" <td>19260275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US48</th>\n",
" <td>Texas</td>\n",
" <td>26448193</td>\n",
" <td>13145494</td>\n",
" <td>13302699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US36</th>\n",
" <td>New York</td>\n",
" <td>19651127</td>\n",
" <td>9536179</td>\n",
" <td>10114948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US12</th>\n",
" <td>Florida</td>\n",
" <td>19552860</td>\n",
" <td>9565609</td>\n",
" <td>9987251</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US17</th>\n",
" <td>Illinois</td>\n",
" <td>12882135</td>\n",
" <td>6326778</td>\n",
" <td>6555357</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name Total Male Female\n",
"04000US06 California 38332521 19072246 19260275\n",
"04000US48 Texas 26448193 13145494 13302699\n",
"04000US36 New York 19651127 9536179 10114948\n",
"04000US12 Florida 19552860 9565609 9987251\n",
"04000US17 Illinois 12882135 6326778 6555357"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort('Total', ascending=False).head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Top 5 States by Female Population"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>Total</th>\n",
" <th>Male</th>\n",
" <th>Female</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>04000US06</th>\n",
" <td>California</td>\n",
" <td>38332521</td>\n",
" <td>19072246</td>\n",
" <td>19260275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US48</th>\n",
" <td>Texas</td>\n",
" <td>26448193</td>\n",
" <td>13145494</td>\n",
" <td>13302699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US36</th>\n",
" <td>New York</td>\n",
" <td>19651127</td>\n",
" <td>9536179</td>\n",
" <td>10114948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US12</th>\n",
" <td>Florida</td>\n",
" <td>19552860</td>\n",
" <td>9565609</td>\n",
" <td>9987251</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US17</th>\n",
" <td>Illinois</td>\n",
" <td>12882135</td>\n",
" <td>6326778</td>\n",
" <td>6555357</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name Total Male Female\n",
"04000US06 California 38332521 19072246 19260275\n",
"04000US48 Texas 26448193 13145494 13302699\n",
"04000US36 New York 19651127 9536179 10114948\n",
"04000US12 Florida 19552860 9565609 9987251\n",
"04000US17 Illinois 12882135 6326778 6555357"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort('Female',ascending=False).head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Top 5 States by Male Population"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>Total</th>\n",
" <th>Male</th>\n",
" <th>Female</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>04000US06</th>\n",
" <td>California</td>\n",
" <td>38332521</td>\n",
" <td>19072246</td>\n",
" <td>19260275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US48</th>\n",
" <td>Texas</td>\n",
" <td>26448193</td>\n",
" <td>13145494</td>\n",
" <td>13302699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US12</th>\n",
" <td>Florida</td>\n",
" <td>19552860</td>\n",
" <td>9565609</td>\n",
" <td>9987251</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US36</th>\n",
" <td>New York</td>\n",
" <td>19651127</td>\n",
" <td>9536179</td>\n",
" <td>10114948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>04000US17</th>\n",
" <td>Illinois</td>\n",
" <td>12882135</td>\n",
" <td>6326778</td>\n",
" <td>6555357</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name Total Male Female\n",
"04000US06 California 38332521 19072246 19260275\n",
"04000US48 Texas 26448193 13145494 13302699\n",
"04000US12 Florida 19552860 9565609 9987251\n",
"04000US36 New York 19651127 9536179 10114948\n",
"04000US17 Illinois 12882135 6326778 6555357"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort('Male',ascending=False).head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ What that looks like remains to be seen! Join in if you want to help figure it o

# index

* [Basic Census Reporter API with Pandas](Basic Census Reporter API with Pandas.ipynb)
* [Looking at Black/White income gap in cities / FiveThirtyEight article](538_race_income_gap.ipynb)


Expand Down
71 changes: 71 additions & 0 deletions censusreporter_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# set up some utility methods. If we ever make a python API wrapper, these belong there.
import requests
import pandas as pd

API_URL="http://api.censusreporter.org/1.0/data/show/{release}?table_ids={table_ids}&geo_ids={geoids}"
def _clean_list_arg(arg,default):
if arg is None:
arg = default
if isinstance(arg,basestring):
arg = [arg]
return arg

def json_data(tables=None, geoids=None, release='latest'):
geoids = _clean_list_arg(geoids,'040|01000US')
tables = _clean_list_arg(tables,'B01001')

url = API_URL.format(table_ids=','.join(tables).upper(),
geoids=','.join(geoids),
release=release)

response = requests.get(url)
return response.json()

def _prep_data_for_pandas(json_data,include_moe=False):
"""Given a dict of dicts as they come from a Census Reporter API call, set it up to be amenable to pandas.DataFrame.from_dict"""
result = {}
for geoid, tables in json_data['data'].items():
flat = {}
for table,values in tables.items():
for kind, columns in values.items():
if kind == 'estimate':
flat.update(columns)
elif kind == 'error' and include_moe:
renamed = dict((k+"_moe",v) for k,v in columns.items())
flat.update(renamed)
result[geoid] = flat
return result

def _prep_headers_for_pandas(json_data,separator=":", level=None):
headers = {}
for table in json_data['tables']:
stack = [ None ] * 10 # pretty sure no columns are nested deeper than this.
for column in sorted(json_data['tables'][table]['columns']):
col_md = json_data['tables'][table]['columns'][column]
indent = col_md['indent']
name = col_md['name'].strip(separator)
stack[indent] = name
parts = []
if indent > 0:
for i in range(1,indent+1):
if stack[i] is not None:
parts.append(stack[i].strip(separator))
name = separator.join(parts)
if level is None or indent <= level:
headers[column] = name
return headers

def get_dataframe(tables=None, geoids=None, release='latest',level=None,place_names=True,column_names=True):
response = json_data(tables, geoids, release)
df = pd.DataFrame.from_dict(_prep_data_for_pandas(response),orient='index')
df = df.reindex_axis(sorted(df.columns), axis=1)
if column_names or level is not None:
headers = _prep_headers_for_pandas(response, level=level)
if level is not None:
df = df.select(lambda x: x in headers,axis=1)
if column_names:
df = df.rename(columns=headers)
if place_names:
name_frame = pd.DataFrame.from_dict(response['geography'],orient='index')
df.insert(0, 'name', name_frame.name)
return df

0 comments on commit 9b9ad38

Please sign in to comment.