-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3c7321f
commit 9b9ad38
Showing
3 changed files
with
384 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,312 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Basic Census Reporter API with Pandas" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from censusreporter_api import *" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df = get_dataframe(column_names=True,level=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Top 5 States by Total Population" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>name</th>\n", | ||
" <th>Total</th>\n", | ||
" <th>Male</th>\n", | ||
" <th>Female</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>04000US06</th>\n", | ||
" <td>California</td>\n", | ||
" <td>38332521</td>\n", | ||
" <td>19072246</td>\n", | ||
" <td>19260275</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US48</th>\n", | ||
" <td>Texas</td>\n", | ||
" <td>26448193</td>\n", | ||
" <td>13145494</td>\n", | ||
" <td>13302699</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US36</th>\n", | ||
" <td>New York</td>\n", | ||
" <td>19651127</td>\n", | ||
" <td>9536179</td>\n", | ||
" <td>10114948</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US12</th>\n", | ||
" <td>Florida</td>\n", | ||
" <td>19552860</td>\n", | ||
" <td>9565609</td>\n", | ||
" <td>9987251</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US17</th>\n", | ||
" <td>Illinois</td>\n", | ||
" <td>12882135</td>\n", | ||
" <td>6326778</td>\n", | ||
" <td>6555357</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" name Total Male Female\n", | ||
"04000US06 California 38332521 19072246 19260275\n", | ||
"04000US48 Texas 26448193 13145494 13302699\n", | ||
"04000US36 New York 19651127 9536179 10114948\n", | ||
"04000US12 Florida 19552860 9565609 9987251\n", | ||
"04000US17 Illinois 12882135 6326778 6555357" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df.sort('Total', ascending=False).head(5)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Top 5 States by Female Population" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>name</th>\n", | ||
" <th>Total</th>\n", | ||
" <th>Male</th>\n", | ||
" <th>Female</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>04000US06</th>\n", | ||
" <td>California</td>\n", | ||
" <td>38332521</td>\n", | ||
" <td>19072246</td>\n", | ||
" <td>19260275</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US48</th>\n", | ||
" <td>Texas</td>\n", | ||
" <td>26448193</td>\n", | ||
" <td>13145494</td>\n", | ||
" <td>13302699</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US36</th>\n", | ||
" <td>New York</td>\n", | ||
" <td>19651127</td>\n", | ||
" <td>9536179</td>\n", | ||
" <td>10114948</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US12</th>\n", | ||
" <td>Florida</td>\n", | ||
" <td>19552860</td>\n", | ||
" <td>9565609</td>\n", | ||
" <td>9987251</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US17</th>\n", | ||
" <td>Illinois</td>\n", | ||
" <td>12882135</td>\n", | ||
" <td>6326778</td>\n", | ||
" <td>6555357</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" name Total Male Female\n", | ||
"04000US06 California 38332521 19072246 19260275\n", | ||
"04000US48 Texas 26448193 13145494 13302699\n", | ||
"04000US36 New York 19651127 9536179 10114948\n", | ||
"04000US12 Florida 19552860 9565609 9987251\n", | ||
"04000US17 Illinois 12882135 6326778 6555357" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df.sort('Female',ascending=False).head(5)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Top 5 States by Male Population" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>name</th>\n", | ||
" <th>Total</th>\n", | ||
" <th>Male</th>\n", | ||
" <th>Female</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>04000US06</th>\n", | ||
" <td>California</td>\n", | ||
" <td>38332521</td>\n", | ||
" <td>19072246</td>\n", | ||
" <td>19260275</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US48</th>\n", | ||
" <td>Texas</td>\n", | ||
" <td>26448193</td>\n", | ||
" <td>13145494</td>\n", | ||
" <td>13302699</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US12</th>\n", | ||
" <td>Florida</td>\n", | ||
" <td>19552860</td>\n", | ||
" <td>9565609</td>\n", | ||
" <td>9987251</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US36</th>\n", | ||
" <td>New York</td>\n", | ||
" <td>19651127</td>\n", | ||
" <td>9536179</td>\n", | ||
" <td>10114948</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>04000US17</th>\n", | ||
" <td>Illinois</td>\n", | ||
" <td>12882135</td>\n", | ||
" <td>6326778</td>\n", | ||
" <td>6555357</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" name Total Male Female\n", | ||
"04000US06 California 38332521 19072246 19260275\n", | ||
"04000US48 Texas 26448193 13145494 13302699\n", | ||
"04000US12 Florida 19552860 9565609 9987251\n", | ||
"04000US36 New York 19651127 9536179 10114948\n", | ||
"04000US17 Illinois 12882135 6326778 6555357" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df.sort('Male',ascending=False).head(5)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.9" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# set up some utility methods. If we ever make a python API wrapper, these belong there. | ||
import requests | ||
import pandas as pd | ||
|
||
API_URL="http://api.censusreporter.org/1.0/data/show/{release}?table_ids={table_ids}&geo_ids={geoids}" | ||
def _clean_list_arg(arg,default): | ||
if arg is None: | ||
arg = default | ||
if isinstance(arg,basestring): | ||
arg = [arg] | ||
return arg | ||
|
||
def json_data(tables=None, geoids=None, release='latest'): | ||
geoids = _clean_list_arg(geoids,'040|01000US') | ||
tables = _clean_list_arg(tables,'B01001') | ||
|
||
url = API_URL.format(table_ids=','.join(tables).upper(), | ||
geoids=','.join(geoids), | ||
release=release) | ||
|
||
response = requests.get(url) | ||
return response.json() | ||
|
||
def _prep_data_for_pandas(json_data,include_moe=False): | ||
"""Given a dict of dicts as they come from a Census Reporter API call, set it up to be amenable to pandas.DataFrame.from_dict""" | ||
result = {} | ||
for geoid, tables in json_data['data'].items(): | ||
flat = {} | ||
for table,values in tables.items(): | ||
for kind, columns in values.items(): | ||
if kind == 'estimate': | ||
flat.update(columns) | ||
elif kind == 'error' and include_moe: | ||
renamed = dict((k+"_moe",v) for k,v in columns.items()) | ||
flat.update(renamed) | ||
result[geoid] = flat | ||
return result | ||
|
||
def _prep_headers_for_pandas(json_data,separator=":", level=None): | ||
headers = {} | ||
for table in json_data['tables']: | ||
stack = [ None ] * 10 # pretty sure no columns are nested deeper than this. | ||
for column in sorted(json_data['tables'][table]['columns']): | ||
col_md = json_data['tables'][table]['columns'][column] | ||
indent = col_md['indent'] | ||
name = col_md['name'].strip(separator) | ||
stack[indent] = name | ||
parts = [] | ||
if indent > 0: | ||
for i in range(1,indent+1): | ||
if stack[i] is not None: | ||
parts.append(stack[i].strip(separator)) | ||
name = separator.join(parts) | ||
if level is None or indent <= level: | ||
headers[column] = name | ||
return headers | ||
|
||
def get_dataframe(tables=None, geoids=None, release='latest',level=None,place_names=True,column_names=True): | ||
response = json_data(tables, geoids, release) | ||
df = pd.DataFrame.from_dict(_prep_data_for_pandas(response),orient='index') | ||
df = df.reindex_axis(sorted(df.columns), axis=1) | ||
if column_names or level is not None: | ||
headers = _prep_headers_for_pandas(response, level=level) | ||
if level is not None: | ||
df = df.select(lambda x: x in headers,axis=1) | ||
if column_names: | ||
df = df.rename(columns=headers) | ||
if place_names: | ||
name_frame = pd.DataFrame.from_dict(response['geography'],orient='index') | ||
df.insert(0, 'name', name_frame.name) | ||
return df |