diff --git a/README.md b/README.md
index 5a7d2c1..a8f1660 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,56 @@
-# learning-by-doing
-
-a set of tasks for people to learn basic software and data science skills by completing the tasks
-
-
-## License
-
-Except as otherwise noted, the tutorial content of this `astropgh/learning-by-doing` is licensed under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/) [![CC BY-SA 4.0](https://i.creativecommons.org/l/by-sa/4.0/80x15.png)](http://creativecommons.org/licenses/by-sa/4.0/), and the code samples are licensed under the [MIT License](https://opensource.org/licenses/MIT).
+# Task 4: [database] Preparing for data scraping: design a data model for top baby names
+## Background
+Before we start to scrape the top baby names from the webpage, we need to design
+a data model that we will use to store the data.
+The term "data model" has different meanings in different contexts.
+We can ask what kind of object the data will be stored in.
+A python list? A python dictionary? A pandas data frame?
+For a given type, we can further ask how the data is stored.
+For example, if we store the data in a pandas data frame, we can ask what
+are the columns and rows.
+Let's look at some examples.
+The original webpage store the names as a table, with columns being
+`year`, `female_rank1`, `female_rank2`, `male_rank1`, `male_rank2`..., and
+each row corresponds to one single year.
+A more extreme example would be storing the names as a sequence (say a python list),
+the content of the sequence will be the names, while the indices of the sequence encode
+year, ranking, and gender altogether. A possible way to encode the information is
+```python
+year = 2017 - index // 10
+rank = index % 5 + 1
+gender = 'female' if index % 10 < 5 else 'male'
+```
+While this data model preserves all the information, it is unlikely that this
+model will be very convenient when it comes to data exploration.
+Yet another totally different data model is to group the data by names.
+Let's say we'll store the data in a python dictionary. A possible way is:
+```python
+{
+ 'Emma':{
+ 'gender': 'female',
+ 'years_ranked_1': [2017, 2016, 2015, 2014, ...],
+ 'years_ranked_2': [2013, 2012, 2009, ...],
+ 'years_ranked_3': [...],
+ },
+ 'Noah':{
+ ...,
+ },
+ ...,
+}
+```
+Note that the form (object) that the data is stored and how the data is structured
+are two different things. (*Food for thoughts: why? can you give an example?*)
+Clearly, the choice of data model heavily depends on the questions that we would
+like to answer with the data.
+If the amount of data is very large, we will also need to consider the avabilable
+computing resources like memory usage and I/O speed when designing the data model.
+For now, we don't yet need to worry about the limitation due to computing resources.
+## Task
+Try to come up with a data model that is good for answering each of the following questions.
+Think about the code you'll need to write to interact with the data model to answer
+these questions.
+1. Which years Emma is the most chosen names?
+2. Which name had been the most chosen name for the longest consecutive years?
+3. How many unique male names have be on top 5 between years 1980 and 2000?
+4. Are there more unique male names or more unique female names that are on top 5?
+5. What is the distribution of the numbers of consecutive years that a male name remains the most chosen name?
diff --git a/data_structure.ipynb b/data_structure.ipynb
new file mode 100755
index 0000000..32792f2
--- /dev/null
+++ b/data_structure.ipynb
@@ -0,0 +1,1237 @@
+{
+ "metadata": {
+ "name": "",
+ "signature": "sha256:b99c50f5345e8fd1ce020369618bbc3cabda5bdfdff85e2a1cce6f7592d1a2cb"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+ {
+ "cells": [
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 1
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "#\"\"\"\n",
+ "#get_top_names.py\n",
+ "#For astrophg/learning-by-doing: Task 3\n",
+ "#https://github.com/astropgh/learning-by-doing/tree/master/task-03\n",
+ "#\"\"\"\n",
+ "\n",
+ "def extract_data_lines(filename, start_text, end_text, include_start = False, include_end = False):\n",
+ " \"\"\"\n",
+ " open `filename`, and yield the lines between\n",
+ " the line that contains `start_text` and the line that contains `end_text`\n",
+ " \"\"\"\n",
+ "\n",
+ " # Needed to record the text in between\n",
+ " parsing = False\n",
+ " \n",
+ " # use `yield line` to return desired lines but keep the function going\n",
+ " with open(filename) as fh:\n",
+ " \n",
+ " for line in fh:\n",
+ " \n",
+ " ######################################################################\n",
+ " \n",
+ " if start_text in line:\n",
+ " \n",
+ " parsing = True\n",
+ " \n",
+ " if not include_start:\n",
+ " \n",
+ " continue\n",
+ " \n",
+ " ###################################################################### \n",
+ " \n",
+ " elif end_text in line:\n",
+ " \n",
+ " if include_end:\n",
+ " \n",
+ " #parsing = True\n",
+ " yield line\n",
+ " break\n",
+ " \n",
+ " else:\n",
+ " \n",
+ " parsing = False\n",
+ " \n",
+ " ######################################################################\n",
+ " \n",
+ " \n",
+ " if parsing: # Do stuff with the data\n",
+ " \n",
+ " yield line"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 2
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "Years = []\n",
+ "Names = []\n",
+ "\n",
+ "if __name__ == '__main__':\n",
+ " filename = 'top5names.html'\n",
+ " start_text = '
\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 1918 | \n",
+ " 1919 | \n",
+ " 1920 | \n",
+ " 1921 | \n",
+ " 1922 | \n",
+ " 1923 | \n",
+ " 1924 | \n",
+ " 1925 | \n",
+ " 1926 | \n",
+ " 1927 | \n",
+ " ... | \n",
+ " 2008 | \n",
+ " 2009 | \n",
+ " 2010 | \n",
+ " 2011 | \n",
+ " 2012 | \n",
+ " 2013 | \n",
+ " 2014 | \n",
+ " 2015 | \n",
+ " 2016 | \n",
+ " 2017 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " F1 | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " Mary | \n",
+ " ... | \n",
+ " Emma | \n",
+ " Isabella | \n",
+ " Isabella | \n",
+ " Sophia | \n",
+ " Sophia | \n",
+ " Sophia | \n",
+ " Emma | \n",
+ " Emma | \n",
+ " Emma | \n",
+ " Emma | \n",
+ " \n",
+ " \n",
+ " F2 | \n",
+ " Helen | \n",
+ " Helen | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " ... | \n",
+ " Isabella | \n",
+ " Emma | \n",
+ " Sophia | \n",
+ " Isabella | \n",
+ " Emma | \n",
+ " Emma | \n",
+ " Olivia | \n",
+ " Olivia | \n",
+ " Olivia | \n",
+ " Olivia | \n",
+ " \n",
+ " \n",
+ " F3 | \n",
+ " Dorothy | \n",
+ " Dorothy | \n",
+ " Helen | \n",
+ " Helen | \n",
+ " Helen | \n",
+ " Helen | \n",
+ " Helen | \n",
+ " Betty | \n",
+ " Betty | \n",
+ " Betty | \n",
+ " ... | \n",
+ " Emily | \n",
+ " Olivia | \n",
+ " Emma | \n",
+ " Emma | \n",
+ " Isabella | \n",
+ " Olivia | \n",
+ " Sophia | \n",
+ " Sophia | \n",
+ " Ava | \n",
+ " Ava | \n",
+ " \n",
+ " \n",
+ " F4 | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " Betty | \n",
+ " Helen | \n",
+ " Helen | \n",
+ " Helen | \n",
+ " ... | \n",
+ " Olivia | \n",
+ " Sophia | \n",
+ " Olivia | \n",
+ " Olivia | \n",
+ " Olivia | \n",
+ " Isabella | \n",
+ " Isabella | \n",
+ " Ava | \n",
+ " Sophia | \n",
+ " Isabella | \n",
+ " \n",
+ " \n",
+ " F5 | \n",
+ " Ruth | \n",
+ " Ruth | \n",
+ " Ruth | \n",
+ " Ruth | \n",
+ " Ruth | \n",
+ " Betty | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " Margaret | \n",
+ " ... | \n",
+ " Ava | \n",
+ " Ava | \n",
+ " Ava | \n",
+ " Ava | \n",
+ " Ava | \n",
+ " Ava | \n",
+ " Ava | \n",
+ " Isabella | \n",
+ " Isabella | \n",
+ " Sophia | \n",
+ " \n",
+ " \n",
+ " M1 | \n",
+ " John | \n",
+ " John | \n",
+ " John | \n",
+ " John | \n",
+ " John | \n",
+ " John | \n",
+ " Robert | \n",
+ " Robert | \n",
+ " Robert | \n",
+ " Robert | \n",
+ " ... | \n",
+ " Jacob | \n",
+ " Jacob | \n",
+ " Jacob | \n",
+ " Jacob | \n",
+ " Jacob | \n",
+ " Noah | \n",
+ " Noah | \n",
+ " Noah | \n",
+ " Noah | \n",
+ " Liam | \n",
+ " \n",
+ " \n",
+ " M2 | \n",
+ " William | \n",
+ " William | \n",
+ " William | \n",
+ " Robert | \n",
+ " Robert | \n",
+ " Robert | \n",
+ " John | \n",
+ " John | \n",
+ " John | \n",
+ " John | \n",
+ " ... | \n",
+ " Michael | \n",
+ " Ethan | \n",
+ " Ethan | \n",
+ " Mason | \n",
+ " Mason | \n",
+ " Jacob | \n",
+ " Liam | \n",
+ " Liam | \n",
+ " Liam | \n",
+ " Noah | \n",
+ " \n",
+ " \n",
+ " M3 | \n",
+ " James | \n",
+ " James | \n",
+ " Robert | \n",
+ " William | \n",
+ " William | \n",
+ " William | \n",
+ " William | \n",
+ " William | \n",
+ " James | \n",
+ " James | \n",
+ " ... | \n",
+ " Ethan | \n",
+ " Michael | \n",
+ " Michael | \n",
+ " William | \n",
+ " Ethan | \n",
+ " Liam | \n",
+ " Mason | \n",
+ " Mason | \n",
+ " William | \n",
+ " William | \n",
+ " \n",
+ " \n",
+ " M4 | \n",
+ " Robert | \n",
+ " Robert | \n",
+ " James | \n",
+ " James | \n",
+ " James | \n",
+ " James | \n",
+ " James | \n",
+ " James | \n",
+ " William | \n",
+ " William | \n",
+ " ... | \n",
+ " Joshua | \n",
+ " Alexander | \n",
+ " Jayden | \n",
+ " Jayden | \n",
+ " Noah | \n",
+ " Mason | \n",
+ " Jacob | \n",
+ " Jacob | \n",
+ " Mason | \n",
+ " James | \n",
+ " \n",
+ " \n",
+ " M5 | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " Charles | \n",
+ " ... | \n",
+ " Daniel | \n",
+ " William | \n",
+ " William | \n",
+ " Noah | \n",
+ " William | \n",
+ " William | \n",
+ " William | \n",
+ " William | \n",
+ " James | \n",
+ " Logan | \n",
+ " \n",
+ " \n",
+ " \n",
+ "10 rows \u00d7 100 columns \n",
+ ""
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 9,
+ "text": [
+ " 1918 1919 1920 1921 1922 1923 1924 \\\n",
+ "F1 Mary Mary Mary Mary Mary Mary Mary \n",
+ "F2 Helen Helen Dorothy Dorothy Dorothy Dorothy Dorothy \n",
+ "F3 Dorothy Dorothy Helen Helen Helen Helen Helen \n",
+ "F4 Margaret Margaret Margaret Margaret Margaret Margaret Betty \n",
+ "F5 Ruth Ruth Ruth Ruth Ruth Betty Margaret \n",
+ "M1 John John John John John John Robert \n",
+ "M2 William William William Robert Robert Robert John \n",
+ "M3 James James Robert William William William William \n",
+ "M4 Robert Robert James James James James James \n",
+ "M5 Charles Charles Charles Charles Charles Charles Charles \n",
+ "\n",
+ " 1925 1926 1927 ... 2008 2009 2010 \\\n",
+ "F1 Mary Mary Mary ... Emma Isabella Isabella \n",
+ "F2 Dorothy Dorothy Dorothy ... Isabella Emma Sophia \n",
+ "F3 Betty Betty Betty ... Emily Olivia Emma \n",
+ "F4 Helen Helen Helen ... Olivia Sophia Olivia \n",
+ "F5 Margaret Margaret Margaret ... Ava Ava Ava \n",
+ "M1 Robert Robert Robert ... Jacob Jacob Jacob \n",
+ "M2 John John John ... Michael Ethan Ethan \n",
+ "M3 William James James ... Ethan Michael Michael \n",
+ "M4 James William William ... Joshua Alexander Jayden \n",
+ "M5 Charles Charles Charles ... Daniel William William \n",
+ "\n",
+ " 2011 2012 2013 2014 2015 2016 2017 \n",
+ "F1 Sophia Sophia Sophia Emma Emma Emma Emma \n",
+ "F2 Isabella Emma Emma Olivia Olivia Olivia Olivia \n",
+ "F3 Emma Isabella Olivia Sophia Sophia Ava Ava \n",
+ "F4 Olivia Olivia Isabella Isabella Ava Sophia Isabella \n",
+ "F5 Ava Ava Ava Ava Isabella Isabella Sophia \n",
+ "M1 Jacob Jacob Noah Noah Noah Noah Liam \n",
+ "M2 Mason Mason Jacob Liam Liam Liam Noah \n",
+ "M3 William Ethan Liam Mason Mason William William \n",
+ "M4 Jayden Noah Mason Jacob Jacob Mason James \n",
+ "M5 Noah William William William William James Logan \n",
+ "\n",
+ "[10 rows x 100 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 9
+ },
+ {
+ "cell_type": "heading",
+ "level": 1,
+ "metadata": {},
+ "source": [
+ "QUESTION 1: Which years Emma is the most chosen names?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "dic_names['1918']"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 10,
+ "text": [
+ "array(['Mary', 'Helen', 'Dorothy', 'Margaret', 'Ruth', 'John', 'William',\n",
+ " 'James', 'Robert', 'Charles'], \n",
+ " dtype='|S11')"
+ ]
+ }
+ ],
+ "prompt_number": 10
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_names.get_value('F1','1918')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 11,
+ "text": [
+ "'Mary'"
+ ]
+ }
+ ],
+ "prompt_number": 11
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_names.apply(lambda row: row.astype(str).str.contains('Emma').any(), axis=0)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 12,
+ "text": [
+ "1918 False\n",
+ "1919 False\n",
+ "1920 False\n",
+ "1921 False\n",
+ "1922 False\n",
+ "1923 False\n",
+ "1924 False\n",
+ "1925 False\n",
+ "1926 False\n",
+ "1927 False\n",
+ "1928 False\n",
+ "1929 False\n",
+ "1930 False\n",
+ "1931 False\n",
+ "1932 False\n",
+ "1933 False\n",
+ "1934 False\n",
+ "1935 False\n",
+ "1936 False\n",
+ "1937 False\n",
+ "1938 False\n",
+ "1939 False\n",
+ "1940 False\n",
+ "1941 False\n",
+ "1942 False\n",
+ "1943 False\n",
+ "1944 False\n",
+ "1945 False\n",
+ "1946 False\n",
+ "1947 False\n",
+ " ... \n",
+ "1988 False\n",
+ "1989 False\n",
+ "1990 False\n",
+ "1991 False\n",
+ "1992 False\n",
+ "1993 False\n",
+ "1994 False\n",
+ "1995 False\n",
+ "1996 False\n",
+ "1997 False\n",
+ "1998 False\n",
+ "1999 False\n",
+ "2000 False\n",
+ "2001 False\n",
+ "2002 True\n",
+ "2003 True\n",
+ "2004 True\n",
+ "2005 True\n",
+ "2006 True\n",
+ "2007 True\n",
+ "2008 True\n",
+ "2009 True\n",
+ "2010 True\n",
+ "2011 True\n",
+ "2012 True\n",
+ "2013 True\n",
+ "2014 True\n",
+ "2015 True\n",
+ "2016 True\n",
+ "2017 True\n",
+ "dtype: bool"
+ ]
+ }
+ ],
+ "prompt_number": 12
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "print df_names[df_names=='Emma'].index"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "Index([u'F1', u'F2', u'F3', u'F4', u'F5', u'M1', u'M2', u'M3', u'M4', u'M5'], dtype='object')\n"
+ ]
+ }
+ ],
+ "prompt_number": 13
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ranking_Emma = []\n",
+ "year_Emma = []\n",
+ "\n",
+ "\n",
+ "#for row in range(df_names.shape[0]):\n",
+ "for row in list(df_names.index): # The column labels are strings, not integers\n",
+ " \n",
+ " #for col in range(df_names.shape[1]):\n",
+ " for col in list(df_names.columns.values): # The column labels are strings, not integers\n",
+ " \n",
+ " if df_names.get_value(row,col) == 'Emma':\n",
+ " \n",
+ " print(row, col)\n",
+ " #break\n",
+ " \n",
+ " ranking_Emma.append(int(row[-1]))\n",
+ " year_Emma.append(int(col))"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "('F1', '2008')\n",
+ "('F1', '2014')\n",
+ "('F1', '2015')\n",
+ "('F1', '2016')\n",
+ "('F1', '2017')\n",
+ "('F2', '2003')\n",
+ "('F2', '2004')\n",
+ "('F2', '2005')\n",
+ "('F2', '2006')\n",
+ "('F2', '2009')\n",
+ "('F2', '2012')\n",
+ "('F2', '2013')\n",
+ "('F3', '2007')\n",
+ "('F3', '2010')\n",
+ "('F3', '2011')\n",
+ "('F4', '2002')\n"
+ ]
+ }
+ ],
+ "prompt_number": 14
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "fig, ax = plt.subplots()\n",
+ "\n",
+ "ax.hist2d(year_Emma, ranking_Emma);\n",
+ "ax.set_yticks([1, 2, 3, 4]);\n",
+ "ax.tick_params(axis='both', which='major', pad=10, labelsize=12)\n",
+ "\n",
+ "\n",
+ "ax.set_title(r'$\\rm{Years \\, Emma \\, most \\, popular \\, female \\, name}$' + '\\n', fontsize=18);\n",
+ "\n",
+ "ax.set_xlabel(r'$\\rm{Year}$', fontsize=18, labelpad=5);\n",
+ "ax.set_ylabel(r'$\\rm{Ranking}$', fontsize=18, labelpad=10);"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "display_data",
+ "png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAFFCAYAAAAdAsFPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHFpJREFUeJzt3Xm4LHV95/H3927IjgtCBOEI6CDogAhmXOEKjoiSGEki\nqKCOC2KuW+KMKCDgRQVHHCUqgqAYNIiOcYRoHCNwMQ64xAX0gjIOHDCIT1QQWS4i937nj6ojbdPn\n/Ho9Vfec9+t5+jmna/nVt6ur+tO1dFVkJpIkzWVJ0wVIktrPsJAkFRkWkqQiw0KSVGRYSJKKDAtJ\nUpFhIUkqMiwkSUWGhSSpaFnTBUiaXxGxJbAr8BDg+5l5a8MlzTvnweAW9ZZFRJwWEVdFxIb6cXFE\n7NI1zOqIuDci7o6IL0bEI5uqdzYRcWhEXBQR6+rX8YWI+Hj9uDAivhER90XE55uuVa3wGOC/AV8F\nHjcfE4yI7evl8YSI+H5E7Dcf053DvM+DjV14bSiIiH8Ang/sl5nf6dH/EuCYzLxu3osbQER8AnhJ\nZi7t0e9JwN9m5h/Pf2ULQ0S8Efh8Zt7YdC2jioglwH3AAZn5tXmY3uXA/wZOBT4NfHA+pluoaV7n\nwcZuUW9ZdHgdcCdwVr0A/V5EHA5c0PagqCUQPXtkfgv44fyWs+CspJrHG73M3DDPk3wqcEVmbsjM\nv2zDh3MD82CjZlgAmXkzcAKwD7BqpntEbAO8IDPPaaq2UUXEMR1PL6/31WoAEbFZRLwWeB6zhLGK\nlrBAgnax8gD3/f4WOBJYHRGfzcxbgHcAb+8eMCKWAscB2wK/AB4PnJSZa+v+OwJvAH4NbAo8Eji2\nbpOIeGE9/i5U31YPAV4GHJ6Z36w/mDbUj+2B52Tmkwd9QXWdhwFnAmTm39Xdn0m1O+A/AEcAj6Va\nFvYFvgacRxWaG4D9gK9l5hmjjtvPvJnjtYw03bqNVfX4NwM7AD/MzLM6+s8234+gep8CeG9E3An8\nIDPfN0e9hwNvo3qPX0G1X3wpsDfwj5n54X5r67etiDgEOL1u5+WZ+YmIeBbVsv0YYGVmXj5HzSMt\nt7O0+VTglfXTYyPiZcAZmfm9udajBpezOdftWcaZrdZ9gMs73+s+5vGor3vg+vuWmT7qR/3m3gd8\nhmqz+aRZhjsPOLfj+V7ALcDm9fO/B67u6P8W4KquNp5Qv+EnUa343wcOrad7Rtewl/ZZ/3l1mx8H\nPgn8CFg/y7AProf9ErB13W1nYD3Vh8uD6m5Tdbc/GtO4xXkzx+sbZbqnd75ndbePAu+u/59zvtfT\n2QDsNMDy9Nh6nNd1dNsGuAl4e7+1DdjWpvVwR/Xo9oyuafxBt1GW2z7mRa/pn0fv9WiL+VrOesyD\nOWsaYtn8HbDdgPN4lNc9VP19Lc+jNrDQHsD76zfqcmBFj/571/2f0NX9BuCI+v8jgVM7+s2s6Nt3\ndJuqux3S1c6fAD8G9uf+ExD+tM/azwM2dDxfSvXNY64VuPODZknd7aU9uj11HOP2M28Kr3Hg6QK7\n1yvVPl1tPaHuvltpvne8X4OERc9xgLcCvwUe2k9tfbb1sK55dFTXcP2ExdDLbZ/vW+e0iuvRfCxn\nnXX1W9MQy+ZThqxr0OV8pPpLD3dDPdAZwOupNu/v7dF/Zf33kIh4Rkf3K4B1AJl5fkQ8PCJeDezY\nMcyKHu1d3/X8S8DhwGXAXRGxBjhl4FdR1bE+Ir7e2S0ijsnMMzs6TXcMvyEioFq4urs94AyrYcYd\ncN7MZtDpPptqF9JPu9r5Wd39PwNnM6b53ocrgeXA06g+fEu1/aSPtp4CXDRKUSMut4Mqrkcdpjtq\nnORyNkhNs+lV67KOboPU1autuV73OOqflWHxQDNnSKwv9L8gM3uuMPWCcBywKjPPjoidgeNnae/u\nruebA0cBJ1J9yz0IuCQinpaZ3+/zNfxeZr6to65daPikhgHnzbjMrEybU+3HnbFZ/Xc5A873iHh6\nZv7LkPXMfDCs77O2ftoa+cyeEZfbQRXXo1EMuZxNtKYR6urXROv3bKjB/XP9d+/OjvUZM/tGxA5U\nB5RXZ+bFde/lHcMdVmj/+VT7gP9vZp6TmYfX7R3UZ305R79DqI5jNGIM82ZYMwd1d+rqvmv99zLK\n833my8PM2VAr6V/3GVRPofqm93/6rK3ftmb8QXBExHbFAuf/vZlzPRql4RFey1cnVdOIdfVrovUb\nFg80szL2nDeZeQ31rqqu32S8meoMh63qNjo3+55HtV95M2CPrul0794J4E1Rb1/W7gG+3WftPU/t\njIjdqc6oubZ+/oDXOQ/d+p03vV/ckNPN6oeWZ3P/WTkzXgWcmZlXU57vP6N6f2c+xGfb8uzlRR21\nPRJ4DfC2zLytz9r6aqtjmB9RHQ+Z8WdUB1of3jHusO/NbMvtrDqm1bmbaK716PZZahzrctZjOVk7\nR02/7vM1jr2ufruNUn8//AV3h6h+Ab0P1Zt2K7AGeFdmfq/HsKuovtVNU+3Ouygzv173ezHVyn8Z\n1eb6VVT7zXen+mDYFPgb4IlUH95fycy/rsd9EfBoYEuq3RKbALdk5kfnqPvQenoHAQ8CLgZmPjw2\noTqT4knA3Zm5dX163gnAM+r6z6trXV13uwY4B1hLtck80+2sutvbBxz3WqoPvg+V5k1m9tzvHhEr\nR5lu3cYqqtNOb+f+awLN9CvO94h4HtUPOK8GvpiZa3rV2jH8FNW+/b+i+qD+HdV7/unM/GzXsLPW\nNkRb+1Lt2vgG1ZbmlcD/pFqmPwz8oGNeXgN8ODPPHGW5nWMe7A+cDDyd6n37DtVVBu7teN0PWI9G\nfb/7eC13dLS/FvhI3n8K8qzr9iyvsVTrsHUN/LqHqb9fhoU0IR0f8FOZeVNb2pKG4W4oaXLm3KXZ\nYFvSwFzwpAmI6tfOF1LtBrowIo5oQ1vSsNwNJUkqcstCklRkWEiSigwLSVKRYSFJKjIsJElFhoUk\nqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVNT6sIiI\nR0fEPRFxftO1SNJi1fqwAD4EfIvq/sOSpAa0Oiwi4nDgNuASIBouR5IWrdaGRURsBZwMvAmDQpIa\ntazpAuawGjgnM38WEQPvgjqBtzW+2+qUWNF0CZI0kMyTen45j8zGP1MfICL2Bj4JPCEzfxcRJwG7\nZuaRA7TRvhcmSS2XmT3Doq1bFvsDU8BNEQGwBbA0Ih6bmfv228jx+dbJVNenwbYs1gAHTKaQiVnD\nxlXzGjauesGa58MaNq56YXI1nzxrn7aGxdnABfX/AbyZKjxe01RBkrSYtTIsMnMdsG7meUTcCazL\nzF81V5UkLV6tDItumTn7ttGCMdV0AUOYarqAAU01XcAQppouYAhTTRcwoKmmCxjC1LxPsbWnzi4+\nU00XMISppgsY0FTTBQxhqukChjDVdAEDmmq6gCFMzfsUDQtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZ\nFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUtFHc/GhYg90DW5I0\nG7csJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQi\nw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIs\nJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KS\nVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkomVNF6DF4fi8t+kSADglVjRdQmvmRRu0\n4f1Qf9yykCQVGRaSpKJWh0VEfDIibomI30TE9RFxXNM1SdJi1OqwAN4NPCoztwKeA7wuIg5uuCZJ\nWnRafYA7M9d2dboP+PcmapGkxaztWxZExIcj4i5gLXBKZn636ZokabFpfVhk5muBLYCDgFMi4kkN\nlyRJi06rd0PNyMwE1kTEZ4EjgG/1N+aajv+n6ockqTJdP8o2irDosBz4Vf+DHzCpOiRpAZjiD79E\nXz7rkK3dDRUR20bE4RGxeUQsjYhnA38BfKHp2iRpsWnzlkUCrwHOBAK4DjgyM7/daFWStAi1Niwy\n85e4H0mSWqG1u6EkSe1hWEiSioYKi4h4X0QcOEu/JRFxZET8xWilSZLaYthjFpsDP5il36nAs4Ab\nImJDZn5uyGlIklpi2N1QPwNeHxFXRcTbu/odSXUW058Dzx6lOElSOwwbFlsCuwD/BKyMiBcBRMRy\nYDvgh5m5AbhpLFVKkho17G6oezJzJiCWAKvr7tsAZOZd9fMNo5UnSWqDYbcs8vf/VFsQv62fLu0a\nLoZsX5LUIsNuWWwdEecD/wY8FfhcRGwKHAWsj4idqXZBPWo8ZUqSmjTslsWxwDrgYOAi4AbgNGBT\nYE/gXOCzwKVjqFGS1LCorv495kYj/hjYLTM/NfbG+68h4cSmJi9JG6GTycyehw/G/gvuiPirzPxm\nk0EhSRqvoS8kGBHbAbt2tbEEOAb40Ih1SZJaZKiwiIjXAh/ggWc/QceZUpKkhWHY3VD7A7sByzJz\nycyDKjy+NrbqJEmtMOxuqO9m5o3dHTMzI+KEEWuSJLXMsFsW90XE1rP0WzlsMZKkdhp2y+Ja4IKI\n+AYw3dF9CfAy7r/8hyRpARg2LC6gupjgwT36eYBbkhaYYXdD/Rh4aOfBbQ9wS9LCNfTlPjLztu6O\nWf0c/B2jlSRJapuhwiIz57rm0w5D1iJJaqm+jllExOOoNhzW1s+fMsu4S4D/CnxybBVKkhrX7wHu\nNcA9wI71808BO88yrAe4JWmB6Tcs3gb8ruP5T4H9MvOXnQNFROBlySVpwekrLDLz7K5Ob+wOinq4\njIjzxlGYJKk9hj0b6gVz9HvxkG1Kklpq2LB4VUQ8qLNDRCyNiHcCB45eliSpTYYNi22B9888iYg9\ngG8Cr6K63aokaQEZ9nIfJwDXRMSbgaD6Id7fA88Enjim2iRJLTFUWGTmOwEi4jDgLODPM/OLdbet\nxleeJKkNimFRH5t4+Cy9/xX4CLBlRDyEarfWW4AvjK1CSVLj+tmyeBrwlQHa9Ed5krTA9BMWtwJf\nBlYBGwrDBnDhqEVJktqln7CYBk7PzOv7aTAiTh+pIklS6xTDIjNvBS4ZoM1thy9HktRGw546S0Rs\nB+za1cYS4GjggyPWJUlqkaHCIiJeC3yA6s543TzALUkLzLC/4N4f2A1Y5m1VJWnhG3Y31Hcz88bu\njvVVZ08YsSZJUssMu2VxX0RsPUu/lcMWI0lqp2G3LK4FLoiIb1CdWjtjCfAyYPVoZUmS2mTYsLgA\n2BI4uEc/D3BL0gIz7G6oHwMP7Ty47QFuSVq4hg2LYzPztu6OmZlUlyuXJC0gw16i/NLubvXlyvcA\nrhq1KElSuwz9C26AiNgeWFE//TawFngvcNGIdUmSWmTYX3D/EdWVaB/f1eu3wIdGLUqS1C7Dblmc\nSnUb1YuAI4BzgeXA86i2LiRJC8iwB7h/mpmnZea1AJl5Y2b+JDPfD+w1vvIkSW0wbFis6/j/3yLi\n0I7nm41QjySphYbdDbVNRFwNfAl4H/Dt+pLl66gu9+Hps5K0gAwbFh8AdgDWZua/R8Q7gbOobqt6\n5LiKkyS1Q1S/oxtDQxE7UP2q++qxNDiiiEg4sekyJGkjcjKZGb36jPQ7i06ZeXNE7BMRz83Md4+r\n3VEcn/c2XUIrLI3m346TWxLcbVgmTokV5YHmQRvmRVu05T1ps2EPcPeUmRePu01JUvP6/mCPiJdE\nxC0R8eOI+E8d3beIiKdHxNER8TGq31pIkhaQvnZDRcTewMeofqG9DfC5iNgNeCnwP4BNOgY/atxF\nSpKa1e8xi9cBr83McyJiKXA88C7gvwCfBn4F3ANclpmXTKRSSVJj+g2LbTPzHIDMXF+fKnsjcEBm\nfm9i1UmSWqHfYxZ3dD7JzPuAzxgUkrQ49BsWvc6x+3mvASPio8OXI0lqo353Qz0iIh7S8TyAzbq6\nzbT35LFUJklqjX7D4lnAL3t0P6FHt/H8JFyS1Br9hsXtwBmUgyCozpwaWUSsAM4EDgQeAvw/4K2Z\n+eVxtC9J6l+/YXFpZvZ1vYaI2HOEejotA24CnpGZN0XEc4HPRMTjM/PGMU1DktSHfsPivQO0efow\nhXTLzLuBkzuefzEibgD2oTptV5I0T/o6Gyozr+y3wUGGHUR9v4zH4G1bJWnebRQX/YuI5cCngPMy\n87qm65GkxWZslyiflIhYApxPdTmRVYOMe/lJ//L7/3c+YCemDth5vMVJ0kZtun6UtTosIiKAc4Ft\ngUMyc/0g4+9/0tMnUpckLQxT9WPG5bMO2eqwoDp1dnfgoMz8bdPFSNJi1dpjFhGxM/BqYC/g5xFx\nR/04ouHSJGnRae2WRf1bitaGmSQtJn4YS5KKDAtJUpFhIUkqisyFeZHYiEjo63JWkiQATiYzo1cf\ntywkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLD\nQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwk\nSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJU\nZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqSiZU0XIM2n4/PepkvglFjRdAnqciInN11C\nK8w1F9yykCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRY\nSJKKDAtJUpFhIUkqMiwkSUWGhSSpqLVhERGrIuJfI+KeiPh40/VI0mLW2rAAbgZWAx9rupD5Md10\nAUOYbrqAAU03XcAQppsuYAjTTRcwoOmmCxjYdAPTbG1YZObnM/MLwK+armV+TDddwBCmmy5gQNNN\nFzCE6aYLGMJ00wUMaLrpAgY23cA0WxsWHaLpAiRpsdsYwiKbLkCSFrvIbPdncUScAuyQmS8fcLx2\nvzBJaqHM7Lk3Z9l8FzKEoT70Z3vBkqTBtTYsImIpsJyqxqURsQlwX2aub7YySVp82nzM4gTgbuAt\nwEuAdcBxjVYkSYtU649ZSJKa1+YtC0lSSxgWkqQiw0KSVGRYSJKKDAtJUpFhIUkqanVYRMSKiDg3\nIqYj4jcR8b2IOLij/4ER8aOIuCsiLo2InbrGPy0iflk/Tu3ovm1EXBARN0fEryPi6xHxpDbX3DXM\n/hGxISJWt73eiHhDRFwfEXdGxDUR8eg21xwRe0bEmnq5+GlEHD9qvaPWHBErI+KyuqYberQ9Vfe/\nKyKujYgD21zzpNa/Sc7jjuHGtu7NR81jXf8ys7UPYDPgRGCn+vlzgd8AOwEPA24HDgNWAO8BruwY\n92jgR8Aj6sda4Oi636OANwLbUV3V9lXAL4DN21pzxzDLge8DVwDvaHO9wCuBq4DdO+b7g1te83ep\n7qMSwC7Az4BDG655P+DF9XJ6Q4+2rwTeC2wCvAC4DXhYW2tmQuvfJOfxJNa9eVguxrr+jfxi5/tR\nv/gXAK8Gvt410+8GHlM/vwJ4ZUf/l3fO6B7t3g48oe01A8cCpwIfB1a3tV6qrdafAis3puUCuGdm\n5aqffwZ4S5M1d3Q/qPtDAXhMXfPmHd0up+tLRptqnqXdiax/46x3Pta9MS4XY1//Wr0bqltEbEe1\ncvwQ2JNqpgKQmXcDP6m7A+zR2R+4uqNfd7t7UyX3T9pcc0TsTPXhNvPNd+zGWO+OwA7A4yPipnpT\n+KSIGHvdY14uvgK8NCKWRcTuwJOBrzZU8+P6aGpP4PrMvKuj21XMsqyPYow1d7c7kfVvnPXOx7pX\nT2dcNY99/dtowiIilgOfAs7LzOuAzak21zr9Btiy/n8Lqm8rnf226NHuVsD5wEmZeUfLaz4DOL7+\nYEjGfK+PMde7Y/33WVQL90rgCOAVLa4Z4E3AC6muRXYNcE5mfqehmh+wvPbQ/Xpmxt2yx7BDG3PN\nne1OZP2bQL0TXfdg7DWPff3bKMIiIpZQLVD3AKvqzncCW3UNujVwxyz9t667dba7KXAxcEVmntbm\nmiPiUGCLzPzszCQY4zecCczjdfXf92TmbzLzRuAs4JC21hwRmwGXAm+n2v//SODgiDim4Zrn0mvc\nbXjgh8zQJlDzTLsTWf/GXe+k1716GuOex2Nf/1ofFvVm07nAtsBhef8lytcCe3UMtzmwa919pv/e\nHU3tRbVpNzP8JsD/Am7KzKM3gpqfCewbEbdExC3AXwJvjIjPt7TeHwP39pjcWL6RTajmPYEtM/OT\nmbkhM28GLmRMATdCzXNZC+wSEZ3fNvfqc9ymap7Y+jeheie27k2w5vGvf5M6SDPGgz0foTrbY/Ou\n7g8Dfk11IOhBVGcKXNHR/2iq3QiPoNp3txZ4dd1vOdU3ms8DSzeSmrcAHl4/tgM+DZwObNPGeuv+\nn6jn8xZUm8XXAi9v8Tx+MNW3uSOovkhtX0/jlIZrjrr7c4Bpqq2eFR39rwT+ez3MzNlQD21rzZNc\n/yZU78TWvQkvF2Nd/8b2Jk3iAewMbKA6A+COjscRdf8D6xlwN9Xug526xj8N+FX9OLWj+/51u3d2\ntfvUttbcYzofZzynzk6sXqr95hdQ7RK5iWqfb2uXi7rfc6hOn70duIVq0/1BTdYMHFCPuwFYX/+9\ntKvty+pxrwWe2fR8nqtmJrT+TXIeT2Ldm4flYqzrn/ezkCQVtf6YhSSpeYaFJKnIsJAkFRkWkqQi\nw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhdSHqG7FelV9S80NEXFxROzSNczqiLg3Iu6OiC9GxCOb\nqlcaNy/3IQ0gIv4BeD6wX/a4z0VEXAIck9X9CKQFw7CQBhARO1Bd2O064EmZuaGj3+FU9z04p6n6\npElxN5Q0gKzucXECsA/336SGiNgGeIFBoYXKLQtpQPVdzb4FPBrYPTNviYgzgA9n5o+6hl0KHEd1\nY5tfAI+nuoXo2rr/jsAbqO5bsCnV3fmOzcxb6v4vrMffherWmIcALwMOz8xvTvilSr+3rOkCpI1N\nZm6IiFdTBcYHIuIDwK3dQVE7F1ifma8AiIi9gK9GxG5Z3c/5PcDjMvM/1v3fAnyZ+g5pmXlhRFwH\nfAd4LrAa+DOqG/FI88awkIaQmd+NiA8Cr6e6e9qzuoeJiL2Bo4Andox3VUTcA/wJ1Y1p/onqxjQz\nLgLeHRHbZ+bP62631X+/ldUtNztvCyvNC8NCGt4ZVGHxj5nZ637HK+u/h0TEMzq6XwGsA8jM8yPi\n4fWWyo4dw6zo0d71Y6hZGophIQ1v5kyo9YX+F2Rmzw/6OiSOA1Zl5tkRsTNw/Czt3T10pdKIPBtK\nmpx/rv/+wW6jiNgsIvatT8M9E1idmRfXvZd3DHfY/JQplRkW0vCi/ttzPcrMa6h3VdVnUM14M9XZ\nT1vVbazr6Pc84LfAZsAeXdNZOp6ypcF56qw0hIj4BNVvLfYAbgXWAO/KzO/1GHYV8BRgmmrX70WZ\n+fW634uBVwKXUe1mugp4NrA7cDbV6bR/Q3WQ/FrgK5n51xN8aVJPhoUkqcjdUJKkIsNCklRkWEiS\nigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJU9P8Bv+cRR6qYvXwAAAAASUVORK5CYII=\n",
+ "text": [
+ ""
+ ]
+ }
+ ],
+ "prompt_number": 15
+ },
+ {
+ "cell_type": "heading",
+ "level": 1,
+ "metadata": {},
+ "source": [
+ "QUESTION 2: Which name had been the most chosen name for the longest consecutive years?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def count_max_dups_numbers(L):\n",
+ " ans = []\n",
+ " if not L:\n",
+ " return ans\n",
+ " running_count = 1\n",
+ " for i in range(len(L)-1):\n",
+ " if L[i] == L[i+1]:\n",
+ " running_count += 1\n",
+ " else:\n",
+ " ans.append(running_count)\n",
+ " running_count = 1\n",
+ " ans.append(running_count)\n",
+ " # return maximum ocurrence and number of times. Remember that Python starts at 0, which explains the -1 in cumsum\n",
+ " return [L[np.cumsum(ans)[np.argmax(ans)-1]], np.max(ans)]\n",
+ "\n",
+ "\n",
+ "\n",
+ "def count_max_dups_str(L):\n",
+ " ans = []\n",
+ " if not L:\n",
+ " return ans\n",
+ " running_count = 1\n",
+ " for i in range(len(L)-1):\n",
+ " if L[i] in L[i+1]:\n",
+ " running_count += 1\n",
+ " else:\n",
+ " ans.append(running_count)\n",
+ " running_count = 1\n",
+ " ans.append(running_count)\n",
+ " # return maximum ocurrence and number of times\n",
+ " #print np.cumsum(ans)[np.max(ans)-1]\n",
+ " return [L[np.cumsum(ans)[np.argmax(ans)-1]], np.max(ans)]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 16
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "li = ['aaa','bbb','aaa','abb','abb','bbb','bbb','bbb','aaa','aaa']\n",
+ "\n",
+ "print count_max_dups_str(li)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "['bbb', 3]\n"
+ ]
+ }
+ ],
+ "prompt_number": 17
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "for i in range(np.shape(Names)[1]):\n",
+ " \n",
+ " # Print maximum consecutive duplicates by rank (F1--F5, M1--M5), and also age ranges (inverted, as Years goes from 2017 to 1918)\n",
+ " print count_max_dups_str(list(Names[:,i])), '\\b',\\\n",
+ " Years[np.where(array([label == count_max_dups_str(list(Names[:,i]))[0] for label in Names[:,i]]) == True)[0]][::-1]\n",
+ " print"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "['Mary', 29] \b ['1918' '1919' '1920' '1921' '1922' '1923' '1924' '1925' '1926' '1927'\n",
+ " '1928' '1929' '1930' '1931' '1932' '1933' '1934' '1935' '1936' '1937'\n",
+ " '1938' '1939' '1940' '1941' '1942' '1943' '1944' '1945' '1946' '1953'\n",
+ " '1954' '1955' '1956' '1957' '1958' '1959' '1960' '1961']\n",
+ "\n",
+ "['Barbara', 8] \b ['1937' '1938' '1939' '1940' '1941' '1942' '1943' '1944']\n",
+ "\n",
+ "['Patricia', 7] \b ['1937' '1938' '1939' '1940' '1941' '1942' '1943' '1946' '1947' '1949'\n",
+ " '1950' '1951' '1952']\n",
+ "\n",
+ "['Helen', 6] \b ['1925' '1926' '1927' '1928' '1929' '1930']\n",
+ "\n",
+ "['Ava', 7] \b ['2006' '2008' '2009' '2010' '2011' '2012' '2013' '2014']\n",
+ "\n",
+ "['Michael', 38] \b ['1954' '1955' '1956' '1957' '1958' '1959' '1961' '1962' '1963' '1964'\n",
+ " '1965' '1966' '1967' '1968' '1969' '1970' '1971' '1972' '1973' '1974'\n",
+ " '1975' '1976' '1977' '1978' '1979' '1980' '1981' '1982' '1983' '1984'\n",
+ " '1985' '1986' '1987' '1988' '1989' '1990' '1991' '1992' '1993' '1994'\n",
+ " '1995' '1996' '1997' '1998']\n",
+ "\n",
+ "['Christopher', 16] \b ['1972' '1973' '1979' '1980' '1981' '1982' '1983' '1984' '1985' '1986'\n",
+ " '1987' '1988' '1989' '1990' '1991' '1992' '1993' '1994']\n",
+ "\n",
+ "['John', 24] \b ['1929' '1930' '1931' '1932' '1933' '1934' '1935' '1936' '1937' '1938'\n",
+ " '1939' '1940' '1941' '1942' '1943' '1944' '1945' '1946' '1947' '1948'\n",
+ " '1949' '1950' '1951' '1952' '1961' '1962' '1968']\n",
+ "\n",
+ "['William', 24] \b "
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "['1926' '1927' '1928' '1929' '1930' '1931' '1932' '1933' '1934' '1935'\n",
+ " '1936' '1937' '1938' '1939' '1940' '1941' '1942' '1943' '1944' '1945'\n",
+ " '1946' '1947' '1948' '1949']\n",
+ "\n",
+ "['Richard', 18] \b ['1930' '1931' '1932' '1933' '1934' '1935' '1936' '1937' '1938' '1939'\n",
+ " '1940' '1941' '1942' '1943' '1944' '1945' '1946' '1947']\n",
+ "\n"
+ ]
+ }
+ ],
+ "prompt_number": 18
+ },
+ {
+ "cell_type": "heading",
+ "level": 1,
+ "metadata": {},
+ "source": [
+ "QUESTION 3: How many unique male names have be on top 5 between years 1980 and 2000?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "wh_1980_2000 = np.arange(np.where(array([label == '2000' for label in Years]) == True)[0][0], \\\n",
+ " np.where(array([label == '1980' for label in Years]) == True)[0][0])"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 19
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "Female_names = Names[np.where(Gender == 1)].reshape( len(Years), len( np.where( array(gndr)==0 )[0] ))\n",
+ "Male_names = Names[np.where(Gender == 0)].reshape( len(Years), len( np.where( array(gndr)==0 )[0] ))"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 20
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "for i in range(np.shape(Male_names)[1]):\n",
+ " \n",
+ " print len(np.unique(Male_names[:,i][wh_1980_2000])), np.unique(Male_names[:,i][wh_1980_2000])"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "2 ['Jacob' 'Michael']\n",
+ "4 ['Christopher' 'Jacob' 'Matthew' 'Michael']\n",
+ "3 ['Christopher' 'Jacob' 'Matthew']\n",
+ "5 ['Christopher' 'David' 'Jacob' 'Jason' 'Joshua']\n",
+ "7 ['Andrew' 'Christopher' 'Daniel' 'David' 'Joshua' 'Nicholas' 'Tyler']\n"
+ ]
+ }
+ ],
+ "prompt_number": 21
+ },
+ {
+ "cell_type": "heading",
+ "level": 1,
+ "metadata": {},
+ "source": [
+ "QUESTION 4: Are there more unique male names or more unique female names that are on top 5?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "print len(np.unique(Female_names))\n",
+ "print len(np.unique(Male_names))"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "43\n",
+ "24\n"
+ ]
+ }
+ ],
+ "prompt_number": 22
+ },
+ {
+ "cell_type": "heading",
+ "level": 1,
+ "metadata": {},
+ "source": [
+ "QUESTION 5: What is the distribution of the numbers of consecutive years that a male name remains the most chosen name?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def count_dups_numbers(L):\n",
+ " ans = []\n",
+ " if not L:\n",
+ " return ans\n",
+ " running_count = 1\n",
+ " for i in range(len(L)-1):\n",
+ " if L[i] == L[i+1]:\n",
+ " running_count += 1\n",
+ " else:\n",
+ " ans.append(running_count)\n",
+ " running_count = 1\n",
+ " ans.append(running_count)\n",
+ " # return maximum ocurrence and number of times. Remember that Python starts at 0, which explains the -1 in cumsum\n",
+ " return ans\n",
+ "\n",
+ "\n",
+ "\n",
+ "def count_dups_str(L):\n",
+ " ans = []\n",
+ " if not L:\n",
+ " return ans\n",
+ " running_count = 1\n",
+ " for i in range(len(L)-1):\n",
+ " if L[i] in L[i+1]:\n",
+ " running_count += 1\n",
+ " else:\n",
+ " ans.append(running_count)\n",
+ " running_count = 1\n",
+ " ans.append(running_count)\n",
+ " # return maximum ocurrence and number of times\n",
+ " #print np.cumsum(ans)[np.max(ans)-1]\n",
+ " return ans"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 23
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "li = ['aaa','bbb','aaa','abb','abb','bbb','bbb','bbb','aaa','aaa']\n",
+ "\n",
+ "print count_dups_str(li)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "[1, 1, 1, 2, 3, 2]\n"
+ ]
+ }
+ ],
+ "prompt_number": 24
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "num_con_years_male_M1 = []\n",
+ "\n",
+ "#for i in range(np.shape(Names)[1]/2, np.shape(Names)[1]): # this prints M1--M5, but I just want M1\n",
+ "for i in [5]:\n",
+ " \n",
+ " # Print consecutive duplicates for M1, and also age ranges (inverted, as Years goes from 2017 to 1918)\n",
+ " num_con_years_male_M1 = count_dups_str(list(Names[:,i]))[::-1]\n",
+ " \n",
+ " \n",
+ "num_con_years_male_M1 = np.asarray(num_con_years_male_M1)\n",
+ "\n",
+ "print num_con_years_male_M1"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "[ 6 16 13 1 6 1 38 14 4 1]\n"
+ ]
+ }
+ ],
+ "prompt_number": 25
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "fig, ax = plt.subplots()\n",
+ "\n",
+ "plt.scatter(np.arange(len(num_con_years_male_M1)), num_con_years_male_M1, color='blue', s = 50);\n",
+ "plt.plot(np.arange(len(num_con_years_male_M1)), num_con_years_male_M1, color='blue', lw=1.5);\n",
+ "\n",
+ "ax.set_xlim(-1., 10.);\n",
+ "ax.set_ylim(-3., 42.);\n",
+ "\n",
+ "ax.tick_params(labelbottom=False);\n",
+ "ax.tick_params(axis='y', which='major', pad=10, labelsize=12)\n",
+ "\n",
+ "\n",
+ "ax.set_title(r'$\\rm{Number \\, consecutive \\, years \\, male \\, name \\, top}$' + '\\n', fontsize=18);\n",
+ "\n",
+ "#ax.set_xlabel(r'$\\rm{Year}$', fontsize=18, labelpad=5);\n",
+ "#ax.set_ylabel(r'$\\rm{Number \\, consecutive \\, years \\, male \\, name \\, top}$', fontsize=18, labelpad=10);"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "display_data",
+ "png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAEXCAYAAABSwdSZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcW2XZ//HPPdN9aEuhUNm6sBSR8qItVRG6TAZQwZXl\n0SI/wAUQkPUpD/gI2mFTdkTZBXEDZRNRH1yBKS1lb8EyQ6EUWixgoQXsvs1cvz+uxIYwk2UmyTlJ\nvu/Xa14zOUlOrpNzcs2d+1znvoOZISIi1aEu6gBERKR4lNRFRKqIkrqISBVRUhcRqSJK6iIiVURJ\nXUSkiiipi4hUESV1EZEqoqQuIlJFlNRzCCF8KYTw+xDCzBDCh6KORyRqIYSBIYSxIYSmEMJWUccj\n7xebpB5C+FwI4dchhCUhhI4QwtSM+w8OITySvG9xCOGKcsRlZncBNwH7A33K8ZrSuRDCCSGEZSGE\nMVHHUuNGA2cDfwe0L2ImNkndzP5gZkcCfwMeAK4LIWyfdv+fzGwy8EdgrJmdVcbwWsv4WjUvhHBG\nCGFEJ3dtBNYBGrAoQmb2DPD/oo4jTrIcs2UXm6Se4RtAB3BbJ/ctN7N3yxyPlFeCThK3md1mZjua\nmf7JRszMOqKOIWY6PWajEMukbmZLgZOAg0II34o6HimPEMKAEMLJwGeBEHU8IrnE8ZjtFXUAXTGz\ne0IIdwCXhRD+ZmYvZT4mhHAIcCWwO/A1M/t5COEg4Md4v1+jmT0SQmgCLkk+7khgD3zbJwCPAD8D\nTsG/HXwUeMTMftRJWI0hhG3wf4bjgVlmdl1aPPXAucA2wNvAXkCzmbWGEL6cvG9n/L/6IcBXgalm\n9kS29yKE8FXgIODVZIzvmtnVafefkty214EdgOfN7KbkfV1t+3hghpldn7aek5Pr7wA+BBxsZp/I\ntW254gwhfAa4ghz7KRlfAv9wXBFCWAXMM7OrQgjjgR8COwFHmdnsEMJpwJnACKAFOMLM3gkh3AUc\nAcwGDjWzt/OJP+M9Px44B99fTwJfMLOlIYSfAccAjwFfMrPXc607hLAjcDrwHtA/uQ3fNrM3k/dn\nPTay7ZcuYu/R8Z4r3iyvW9B7nCPWzo7PXO9jT7e74PjJcsymrTfb53Mq8B18338DP0dRD4wF/pi+\n/Xkzs1j9ALel/T0YeA14HKjPvD95u39yJx3TybLJacuGJJc9AAxOLhsBtOPJpV9y2cjksu3Snjsy\n+dzT05bVA7OAs9OW/Qy4Ne323sCbwBbJ2+OS62lOPv9Z4HM53o//ST6uV/L20cl1fCR5+8r010wu\n+wnwgzy2fSMwLHl7f+BHGet5qIBtyxVnvvtpRHLZ8E7ei6GdPH7H5HYkMtb7i4znZo2/i/d+KLAG\n/8eQWjYAuCfPdTckb98B/CPt/nOA5zLW0emxkWu/ZIm9J8d7zniTyzP3RcHvcb7HZwHvY0+2u7vx\nZztm8/l87pF8/qlpy7bEc9/3cu3rD7xmoU8o9Q8fTNoHJt/873Z2f9rBdUwnyyZ3sux7abfrksuO\n7WTZ/mnLRpKWoNKWHw2sxj/8Y5OPGZfxmFeBIzPWc0ie78VWwFrgpIwD6BygN/Dh5HszPuN545LL\nd81j2/dL3v488CIwBQjJZV9I/s62bVNzxNmnkP2U9h594AOSZb/+Hvh12u2j8JPp5BH/kTn2wa3A\nw2m3vwGMKWTdyePkkrT7Uh/iD3Wy3YdkrKfL/ZLH8dPd4z1nvJn7oifvcT7HZzfiKmi7e3iMdHrM\nkufnM8vz/xdYDwzNZ3+nfmLb/ZJiZn8PIVwHfDeE8EARVrkobd0dIQTwHZe5rD7PdfUHJuE7BuCQ\nEMLktMfMxhNeulfyjHUi0Bf4T9eTmS0GLgUIIXwK/9r3z4znvZFc/kng5Yx4U+tJbWfqGHgAT9AP\nA6tDCC3ARcn7Elm2bV2uOMvgJ8DdIYStzOwdPKHfnnZ/tvgz902mHwFzQwh7mn8NH2NmtxaybjP7\nZQhh2xDCCfg3i5TOSmQzj41s+yUfi1J/5Hu8FxhvSk/e42yx9kpbVkhcna0r23YXI/5MhX4+Mz2G\nN972wxsueYl9Uk86G38Dfgn8I+JY0vVP/t6I/+cFbzHmStpr8lx/6oBbl+P+BrwPMGVA8nfvPF8n\ntY5jgOl4q/BA4MEQwkS8FQFdbFsI4dAccXZbCGGSmc3M8bAHgGXAV0MIf8Bbtumyxp+NmT0XQpgF\nnB5C+AWQGUvOdSeT0LnAKWZ2c7L07bwuXjLz2Ohyv5jZs4VsS74KjDel2+9xiePKV9HiTztme/r5\nTP2zKqjSKJbVL5nMbB3+1WtX4NBOHvK+jQ4hDCtHXPjJj7XAo8CDyWVjM2IZEEKY0M31z8b/YWSu\ns3/yApyW5KLhGc/bJfn74QJe64t4//4CM7vFzKYCN+BJ5G/Jx3S1bY/miDMln/2U+ueYqiRIdPKY\n9zGzdrz89Ti8VfubjIfkij+XH+NdOkcDvytk3SGEHfD38UIz+0Py7t5pjzs8x2tn2y9F14N4/578\nXczjvxhx5asn8Xd1zM5I/s7385lZPbMfm/NL3uKY1LcOIQzJXGhmTwE/oPP/bvOBrdNuH4onmW1T\nC0LyuxZp25zvsqRNpCWYEEJfvH/1O2b2bvKr+Y+A00II6c89Cz9bD5t3Wj5dO5iXdl6MtxIHpt31\nLWC1mc0BbsaTWbrjgRvM7B8FbGcAzkxbDt7yfsrM2rJtm5m9lSXO9JZnzv2EfzV9j80HfuoDk23f\ngPd97w5sbWar0u/IFX8n68r0W+AdYKFl1Gfnse5B+Hub/hX+s3hf6QDgI6nNS/7OPDa63C/ZAu7B\n8Z5XvJnPy/P470ms3Yor32U9iZ8ujlnzi7Ryfj7TfCUtvp2AE0nmlxyv/z6pEy+RCyF8Hn8DJ+Jn\nfe80s3MyHtMLmGkZ5VzJ/6Tn4VUyhvdF3YN/EK8H5gHfAybjfW0/w/9LXphc1gbcgl85em5y2Qv4\nG39d8mteM3At3g0UgN2AB8zs7oxYTsH/wy7Cu7d+b2azkmVr04B9kuv+q5n9d57vzXF4i60VT4IP\nmtnDafefgpdC/Rs/afmsJUstQwiJPLb9puR7tRswEP+q2Bd408x+kmvbCogz2366zsx+nHzcZ4FT\n8a62/zOzlhDCFHwfTAaeBy7L6DcnhHAfcIGZze3ifcwafzYhhB8B07v6gGVbdwjhKPyD/TD+T+45\nvL/1w/iHvj9dHBshhK+QY790Eks++zzb8Z4r3pVp628FbrRk6V2h73EesXY3roK3uzvxp23HB47Z\ntPu6/Hwm7x+Jn0v5Ft7A2YgfC7/JzC/5iE1SF4mTEEIfYBvzOvR+eEL/36jjkuqTltRHmtlrPV1f\nHLtfROLgYjb3iZ6ItwRFSiFbt2LBlNRFOncXMCuEcB4w38xezfUEkUIlu2XvxLsj7wwhHNnjdar7\nRUSkeqilLiJSRZTURUSqiJK6iEgVUVIXEakiSuoiIlVESV1EpIooqYuIVBEldRGRKqKkLiJSRZTU\nRUSqiJK6iEgVUVIXEakiSuoiIlVESV1EpIooqYuIVBEldRGRKqKkLiJSRZTURUSqiJK6iEgVUVIX\nEakiSuoiIlVESV1EpIr0ivLFQwgW5euLiFQqMwudLY+8pW5mZf+ZPn16JK8b5Y+2uTZ+tM218ZNN\n5EldRESKR0ldRKSK1GRSb2xsjDqEstM21wZts4Rc/TP/eWAIuwHzgLvN7OjksgOA64CdgCeAr5rZ\na3m/eAiW7+uLiIgLIWBFOFF6HfAkYMmVDgXuBc4FhgBPA3f2LFQREemJvJJ6CGEq8C7wIJD673AY\n8LyZ3WtmG4BmYO8QwuhSBCoiIrnlTOohhEHA+cCZbE7oAHsCz6VumNka4GVgTJFjFBGRPOXTUr8Q\nuMXM3sC7XlKd4A3AiozHrgC2KF54IiJSiKxXlIYQxgIHAONSi9jcWl8FDMp4ymBgZSEBNDc3/+fv\nxsZGnckWEcnQ0tJCS0tLXo/NWv0SQjgduJjNiXoLoB54AbgRONbMJiYf2wC8DYw1s5fyenFVv4iI\nFCxb9UuupN4fGJi6CZwFjAROTN5+Gfg68ABwATDRzPYrIDAldRGRAmVL6lm7X8xsLbA2bUWrgLVm\ntjx5+3DgWuBXwOPA1GIFLSIihcv74qOSvLha6iJF097uv+vro41DSq9YFx+JSAzNmQOJBPTp4z+J\nBMydG3VUEhW11EUq2Jw5MHkyrF79/uUNDfDIIzB+fDRxSWmppS5SpaZN+2BCB1921lnlj0eip5a6\nSIVqb/fulo6Ozu+vq4MNG9THXo3UUhcRqRFK6iIVqr7e+9O7MmWKWum1SN0vIhVs7lyYNKnzE6Uz\nZ8K4cZ0/Tyqbul9EqtS4cV7lMmHC5mXDhimh1zIldZEKN348nHmm/73XXt7lMnZstDFJdJTURapA\nayv06gUnnABvvAELFkQdkURFSV2kCrS1wW67wac+5bcfeijaeCQ6SuoiVaC1FfbcE3bdFXbYAR5+\nOOqIJCpK6iIVbt06WLgQPvIRCAGamjypq7CsNimpi1S4F1/0q0o/8hG/nUjA2297611qj5K6SIVr\na/Pfe+7pv5ua/Le6YGqTkrpIhWtt9TLG3Xbz2yNGwKhROllaq5TURSpcqvKlb9/Ny5qaYMaMzRNn\nSO1QUhepcG1tm/vTUxIJePddeO65aGKS6Cipi1Sw9evh5Zc396enJBL+W/3qtUdJXaSCvfSSd7Fk\nttS33x5231396rVISV2kgqXKFjNb6uCt9UcegY0byxuTREtJXaSCtbV55cvo0R+8r6kJVq2CZ54p\nf1wSHSV1kQrW2upDA6RXvqQ0Nvpv9avXFiV1kQrWWeVLyjbb+FC86levLUrqIhVq/XofYrerpA7e\nr/7oo/5YqQ1K6iIVasECr3zp7CRpSlMTrF0LTzxRvrgkWkrqIhUqVfmSraU+ebKP3Kh+9dqhpC5S\nodraoK7O69G7MmSIT3enpF47lNRFKlRrK+yyC/Trl/1xiQQ89ph3w0j1U1IXqVBtbdn701MSCdiw\nAWbPLn1MEj0ldZEKtGFD7sqXlEmT/AIllTbWBiV1kQq0YAFs2pRfS33gQPjoR9WvXiuU1EUqUGq2\no3xa6uCljU8+CStXli4miQcldZEK1Nqau/IlXSLhNe2zZpU2LomekrpIBWprg513hv7983v8fvtB\nnz7qV68FSuoiFai1Nf+uF4ABA2DffdWvXguU1EUqzMaNPjlGPidJ0zU1wZw5Ps2dVC8ldZEKk6p8\nKaSlDt6vbuYTZ0j1UlIXqTCpypdCW+of/7j3wasLprrlTOohhF+FEN4MIawIIbwSQjg37b4DQgjz\nQwirQwgPhRCGlzZcEWlt9UG68q18SenbF/bfXydLq10+LfUfAKPMbBBwMHBqCOFTIYShwG+Bc4Eh\nwNPAnSWLVESAzZUvAwYU/txEAubNg7ffLn5cEg85k7qZtZrZurRFG4G3gcOAeWZ2r5ltAJqBvUMI\nncyWKCLFUmjlS7qmJv/d0lK0cCRm8upTDyFcH0JYDbQCF5vZHGBP4LnUY8xsDfAyMKYUgYpI9ytf\nUvbZB7bYQv3q1SyvpG5mJwNbAAcCF4UQPgY0ACsyHroi+TgRKYGFCz2xd7el3ru3T5yhfvXq1Svf\nB5qZAS0hhLuBI4FVwKCMhw0GChpdorm5+T9/NzY20piaAl1EPiA121F3W+rg/eoPPABvvAHbb1+c\nuKS0WlpaaMmzzyx4rs5fCOEW4F/AYuBYM5uYXN6A97WPNbOX8lyXFfr6IrXswgth+nRYtap7J0rB\nL0DaZx/41a/gqKOKG5+URwgBMwud3Ze1+yWEsE0IYWoIoSGEUB9C+BTwX8D9wH3AmBDCYSGEfsB0\n4Nl8E7qIFK61FUaO7H5CB9h7b9hyS/WrV6tcfeoGnAgsAZYDFwJHm9lTZrYMOBy4GHgHmABMLWGs\nIjUv39mOsqmvh8ZG9atXq6x96snE3Zjl/geBPYock4h0YtMmePFFOPjgnq8rkYDf/Q4WLfKWv1QP\nDRMgUiEWLvRp7HraUofN9erqgqk+SuoiFSJV+dLdcsZ0e+4J22yjpF6NlNRFKkRqIK89itDhGYJ3\nwTz8sI/cKNVDSV2kQqQqXxoairO+RAKWLIGXXy7O+iQelNRFKkQxKl/SJRL+W10w1UVJXaQCpCpf\nitGfnjJ6tF9RqtLG6qKkLlIBXnkF1q8vbktd/erVSUldpAKkTpIWs6UOXtr41lub1y+VT0ldpAKk\nyhmLUfmSTv3q1UdJXaQCtLXBiBE+FnoxjRrlFTXqV68eSuoiFaAnsx3lkkj4TEgdHaVZv5SXkrpI\nzLW3w/z5xT1Jmq6pCd59F557LvdjJf6U1EViLlX5UsqWOqhfvVooqYvEXKoypVQt9R128Jp1JfXq\noKQuEnOlqnxJl0jAjBl+kZNUNiV1kZhra4Phw2HgwNK9RiIBK1f6VHdS2ZTURWKulJUvKan53lXa\nWPmU1EVirNSVLynDhvlrqF+98impi8TYokWwbl3pW+rgpY2zZvnsSlK5lNRFYqyYsx3lkkjAmjXw\n5JOlfy0pHSV1kRgr1UBenZkyxUduVL96ZVNSF4mx1lbYcUcYNKj0r7XVVjB2rPrVK52SukiMFXu2\no1yammD2bFi7tnyvKcWlpC4SUx0d8MIL5el6SUkk/ETpY4+V7zWluJTURWJq0SJvMZezpT5pEtTX\nq1+9kimpi8RUOStfUgYNggkT1K9eyZTURWKqnJUv6ZqavKxx1aryvq4Uh5K6SEy1tvoIioMHl/d1\nEwkf2GvWrPK+rhSHkrpITJW78iVl//2hd291wVQqJXWRGIqi8iVlwADYd1+dLK1USuoiMbR4sV+y\nH0VSB++CmTMH3nsvmteX7lNSF4mhUs92lEtTk39beOSRaF5fuk9JXSSGoihnTLfvvtCvn/rVK5GS\nukgMtbXB9tvDlltG8/p9+/oJU/WrVx4ldZEYKsdsR7kkEvCPf8CyZdHGIYVRUheJmVTlS1T96SlN\nTf67pSXSMKRASuoiMfPaa7B6dfQt9QkToKFB/eqVRkldJGairnxJ6d0bJk9Wv3qlUVIXiZmoK1/S\nJRI+8fWbb0YdieRLSV0kZtraYLvtYMiQqCPxpA7qV68kOZN6CKFPCOHWEMKiEMKKEMLcEMKn0+4/\nIIQwP4SwOoTwUAhheGlDFqlucah8SRk3zgcUUxdM5cinpd4LeA2YbGaDgPOAu0IIw0MIQ4HfAucC\nQ4CngTtLFaxItTPzlnpcknp9vU9IrZOllSNnUjezNWZ2vpm9lrz9f8CrwATgMGCemd1rZhuAZmDv\nEMLoEsYsUrX++U+vfIn6JGm6piZYuNCrciT+Cu5TDyEMA0YDzwN7As+l7jOzNcDLwJhiBShSS+J0\nkjQl1a+u1nplKCiphxB6A7cDPzOzl4AGYEXGw1YAWxQnPJHaEtVsR9mMGQNDh6pfvVL0yveBIYQ6\n4JfAOuCU5OJVwKCMhw4GVua73ubm5v/83djYSGNjY75PFak6ra0wbBhsvXXUkWxWVweNjd5SN4MQ\noo6o9rS0tNCSZwlSMLPcDwohAD8FhgOHmNn65PLjgWPNbGLydgPwNjA22ZLPtV7L5/VFasW++/pV\nnA8+GHUk73fDDXDyybBgAey6a9TRSAgBM+v032u+3S83AB8GPp9K6En3AWNCCIeFEPoB04Fn80no\nIvJ+cat8Sad+9cqRT536COAEYG/gXyGElcmfI81sGXA4cDHwDl4RM7WUAYtUqyVLYOXKeFW+pOy+\nu18QpX71+MvZp25mi8mS/M3sQWCPYgYlUoviWPmSEoK31h98UP3qcadhAkRiIi4DeXWlqQmWLvWx\nYCS+lNRFYqK1FbbdNl6VL+lS/erqgok3JXWRmIjrSdKUUaNg+HCdLI07JXWRGEhVvsS16wW8H72p\nyZN6R0fU0UhXlNRFYuD112HFini31MG7YN55B+bNizoS6YqSukgMxP0kaYr61eNPSV0kBuJczphu\np538ilL1q8eXkrpIDLS1wTbb+E/cNTXBjBmwaVPUkUhnlNRFYiBOsx3lkkh4///cuVFHIp1RUheJ\nWCVUvqRTv3q8KamXUXu7/4ike+MN+Pe/K6elPmyYx6p+9XhSUi+DOXO8ddOnj/8kEvrqKptVSuVL\nuqYmmDkTNmyIOhLJpKReYnPmwOTJ0NLiF2x0dPjfkyb5fSKVUvmSLpGANWvgqaeijkQyKamX2LRp\nPpFwptWr4ayzyh+PxE9bm4/3UgmVLylTpvgVpuqCiZ+8Zj4q2YtX+cxH7e3e3dLVJdV1df71tb6+\nvHFJvOy/P/Tq5WWClWTcOBgyRCdMo1CMmY+kBDo6YNmyqKOQKMV5tqNcEgmYPRvWrYs6EkmnpF5C\n9fXen57NLrvA2Wf7ONVSe/71L3jvvco6SZrS1ATr18Njj0UdiaRTUi+xK67wbpZMDQ1wzz3wxS/C\nlVf6sKZnnOEDO0ntqMSTpCmTJvmxrX71eFFSL7FnnvFulj328A9AXZ1/bZ05Ew4/HH71K59J5stf\nhmuvhZ139lnbX3st6silHCqxnDFl8GCYMEF96nGjpF5Cb70F55wDjY3eItuwwX8eeshPMqXsthvc\ndhssWABf/SrccosPmnT88fDKK1FFL+XQ2gpbbeUzHlWiRAKeeMKHDdCFdfGgpF5C//M/Xrp4/fVe\n/lVfn73SZdQouOkmWLgQTjgBfvlLGD0ajj0WXnyxfHFL+aSGB6jUiZx32skH9tpyS11YFxdK6iUy\nYwb84hee2PfYo7Dn7rSTd8W8+iqcdhrcfbev48gjN/fBSuUzq6yBvDLNmeMn+cG3RRfWxYOSegls\n2AAnnQQjR8K553Z/PdttB1ddBYsW+Yfnj3+EMWPgiCPg2WeLFa1EZelSePfdyuxPB7+wbs2aDy7X\nhXXRUlIvgauughde8Nb2gAE9X9+228Ill3hy/+534e9/9z75z39el2lXskqufGlvh0ce6fr+GTPU\nxx4VJfUiW7QILrgADj0UPvOZ4q5766193anXmDULPvYx+PSn4dFHu36eRoeMp1TlSyUmdYkvJfUi\nO+00L1u85prSvcaWW3qLffFib8HPmQMTJ/rFIC0t3r8JGh0y7lpb/TL7D30o6kgKl+vCuilTNPxF\nVDT2SxHdf79fTHT55eXtU1y9Gm6+GS67zK9QnDgRpk71fvjMPs+GBv/aPH58+eKTzk2e7CcXZ82K\nOpLumTvXT4pmDljX0ODXYaSX7UpxZRv7RUm9SFav9q/RgwZ5C7l37/LHsHYt3HorXHopLFnS9eMS\nCV0wEjUzGDrUT3rfdFPU0XTfnDnegJkxY/PAdTfeCN/8ZrRxVTsl9TI45xxvKc+a5aPuRWnNGthi\ni83dMJk0OmT0li71bpcf/hBOPz3qaHquvd0bFXvv7Q2a556Dvn2jjqp6aZTGEnv+ea94+frXo0/o\n4B+mSr2YpVZU8vAAnamv94bEtdf6hXJXXBF1RLVLSb2HOjq8Jn3QIO/2iAOdxIq/Si5nzObgg31M\no4su0hAXUVFS76Gf/9y7XC6/3PtI4+Kqq/yEVaZ+/XxUSIlWW5tXMW23XdSRFN8Pf+iTfpx6atdd\ngFI6Suo9sHy5DwOw//4+EFecjBvnVS6JhPehh+BljWPGqCohDlLDA1RjN9mOO8L558MDD8B990Ud\nTe1RUu+Bb3/bJzi44YbOx0yP2vjxXuWyYQNs3OjfJp5+WuNfx0FqIK9qddppftL09NNh5cqoo6kt\nMUxFlWH2bB8i98wzYa+9oo4mu9TokCecADvs4Bcu6WtxdN56y6cxrLb+9HS9enljZ8kSaG6OOpra\noqTeDZs2+cnRnXaC6dOjjiZ//fr5AGOPPgp//WvU0dSuWhke4BOf8DkBrrkG/vGPqKOpHUrq3fCj\nH/lBes01XsZVSb7xDRgxQq31KKUqX6q5+yXlkkt8KIQTT9x8cZKUlpJ6gZYs8db5Zz7jQwJUmj59\nPKE/9ZQP5Svl19bmJbDbbx91JKW31VZ+Luexx+CnP406mtqgK0oLdMQRfla/tdVnKqpEGzf6pBsD\nB/ocqnE8yVvNGhv95PXs2VFHUh5mfm1Ea6tfmBSn0t9K1aMrSkMIp4QQng4hrAsh3JZx3wEhhPkh\nhNUhhIdCCMOLFXQc/elPcO+9cN55lZvQwS/jnj7dJ9pQyVn5tbVVf396uhD8pOmKFZtnSpLSydlS\nDyEcCnQAnwL6m9nXksuHAi8D3wD+AFwETDKzT+T94hXUUl+71vtA+/b1cS369Ik6op5pb/ea9fp6\n3x5dYVoeb7/tk55cdZVXTtWS1PhIM2f6SKLSfT1qqZvZfWZ2P7A8467DgOfN7F4z2wA0A3uHEEb3\nNOA4uvhinzP0+usrP6GDJ/HmZv9KfNddUUdTO2ql8qUz3/seDB/ulWMbN0YdTfUqpDc187/CnsBz\nqRtmtgZvuY8pQlyxMn++tzCOPtqv0KwW//VfXmPf3OxlmlJ61TaQVyEaGrxy7PnnfSgBKY1Cknpm\nP0kDsCJj2Qqgwor8sjODk0/2A7LaRp6rq/PLuV96Ce64I+poakNrq1e+7LBD1JFE4wtfgM99zhsS\nr70WdTTVqVcBj81sqa8CBmUsGwwUdFFwc9rlZo2NjTQ2Nhby9JK74w6/rP6GG7wvtNp88Ys+Fsz5\n58ORR0YzuUctSZ0krcYxX/L14x/7e3D66TpRn6+WlhZaWlryemzeJY0hhAuBHdNOlB4PHGtmE5O3\nG4C3gbFm9lKe64z1idL33oPdd4eRI73OtlpL//74R289/eQncNxxUUdT3YYNg89+1meoqmWXXupj\nJ/3+937sSWF6WtJYH0Loh7fq60MIfUMI9cB9wJgQwmHJ+6cDz+ab0CvBuef6GB033li9CR38QqqP\nfxwuvBDWr486muq1bJmP+1KLJ0kznXmmvw+nnvrBOU6lZ/JJVd8F1gDnAP8PWAuca2bLgMOBi4F3\ngAnA1BLFWXZPPeVdLqecUv1D1YYAF1zgfZy13oIspVo+SZqpTx//fC1e7BNqSPHoitJOtLd7y/WN\nN7zyZVCBIvxrAAAMKElEQVTmmYMqZOazJb3yCrz8MvTvH3VE1efGG72cb/FiL+0Tn4fg9tv9Wgl9\ng8mf5igt0A03+OXzV19dGwkdvLV+4YX+j6ySZ7ePs9ZWHwBup52ijiQ+Lr/ch6s4+WQNMFcsaqln\nePNN+PCHvaX+l7/UXpXCAQd4HfErr3Q+HZ503wEHwKpV8MQTUUcSLzffDN/8pk8NecwxUUdTGdRS\nL8C0aX6y8Lrrai+hg7fW33rLt1+Kq7VV/emdOe442HdfOOsseOedqKOpfErqaf72N/j1r73Uarfd\noo4mGvvtB5/+tF9Bq2nIimf5cli6VP3Gnamr8y7P5cvhO9+JOprKp6SetG4dfOtbsOuuntRr2QUX\n+AfsmmuijqR6vPCC/1ZLvXNjx/q8pjffrO6pnlJST7rsMliwwLsd+vWLOppoffSj8PnPw5VX+gVY\n0nOp2Y7UUu/aBRf4xCEnnqixiHpCSR0v4fv+9+HLX4ZPfjLqaOLh/PM9oV99ddSRVIe2Nq98USlj\n1wYO9IG+nn1W53R6ouarX8zg4IN9Fpr582tjirF8HXGET1D96quw9dZRR1PZDjzQJ4l48smoI4k3\nMzjkEJ8c/YUXanfgs1xU/ZLFPfd46eJFFymhZzr/fC/Bq7bRKaNQa7MddVcIcO21Pt56rU0iUiw1\nndRXrIAzzvBhAE4+Oepo4mfPPWHqVB8D+623oo6mcr37rl//oJOk+dllF6+Cuftub3BJYWouqbe3\n+w/4PJ1vvumXb/cqZBDiGjJ9ulcGXXpp1JEUJn0/R62WZzvqrrPPhtGjvSJt7dqoo6ksNZPU58zx\nWYv69PGfCRO8BfrNb8LHPhZ1dPG1++4+49P11/sQAnGXuZ8TCZg7N9qYUpUvaqnnr29fP+YWLoRL\nLok6mspSEydK58zxwao6G+Lz4YchZvNyxM4rr3hyP/FEn+Agrrrazw0N8MgjMH58NHGdcYaPVb9y\nZXUP4VwKX/kK3HsvzJvnLXdxNX+idNq0rsdsvuCC8sZSiXbeGb72Nb8wJM5TkHW1n1ev9kvQo9La\n6l0vSuiFu+oqv27kW9/SgF/5qvrDrL3dW2ldmTEjPn2vcXbeef774oujjaMrcd7Pqnzpvg99yI+5\nv/8d7rwz6mgqQ9UndSmO4cPh+OPhpz/17hjJz3vv+bkI9ad330knwT77eInjv/8ddTTxV/VJvb7e\n+1m7MmWKP0Zy+853/L268MKoI/mg+nqYOLHr+/fZJ5r9rMqXnquv9wq1pUvhu9+NOpr4q/qkDt4v\n19nY4A0NPr6J5Gf77b3V9Itf+Dg5cbJ0qV930JkQfIz43/62vDGBprArlgkT/FqS667zE+LStZpI\n6uPGeX9rIuEnq+rq/O+ZM6t//tFi+/a3/cTV+edHHclmzzzjH/oXX4Qf/OCD+/nPf4a99oLDD4fm\nZujoKF9sra0+NeCIEeV7zWp10UWwzTZehaXzYFmYWWQ//vLltWmT/0j3nX22WQhmra1RR2J2xx1m\n/fqZ7bST2Zw5m5dn7ue1a82OPdYMzA491GzlyvLE98lPmo0fX57XqgW33+778Lrroo4kWsnc2Wle\nrYk6dSmuZctg1CgfCO2uu6KJob0dzj3Xr3SdONFrmbfdNvtzzHyM+GnTvI/7/vu9XLOUdtwRmpq8\ny0p6zgwOOgieftoH4NtmG19ea+fFar5OXYpr6FC/oObuu30W+HL79799vPdLL4UTToAHH8yd0MH7\n1s84w7tjXn/dx41/6KHSxvn66zpJWkwheL/6mjWw997xunI4LpTUpVv++79h8GAfG6acXnrJJwX/\n61/9MvKbbvIPdSEOOsiHwB02zMfPv/ba0lzYopOkpZG6wOytt/z8SEcHtLTApEk6iQpK6tJNQ4Z4\nN8b99/tX4XL48599nJ7ly/1ilJNO6v66dt0VHn/cx+4+9VSvwV+/vnixgsoZS2XaNB+aN1PUVw7H\nhZK6dNvpp8NWW8H3vlfa1zGDyy+Hz3wGRo6Ep57y6wt6atAg+N3vvG/+1lu97/tf/+r5elNSlS8j\nRxZvnbUuzlcOx4WSunTboEE+ROqf/gSPPVaa11i71keJPPtsL0l89NHiJsm6Oi+Vu/NOn0btox8t\n3jePtjb48Idr7yRelFR3oaQuPXTKKV6BUIor/ZYs8auBb7/dr2K9887OLyIrhi99yf9h1NV53+wd\nd/R8na2t6k8vtlxXiNfV+ZXPtTypi5K69EhDg1+Q9OCD/tW3WGbP9guK5s/3LpLzzvPKh1IaO9Zb\n6R/7GBx1FJxzTve/yq9Y4f+U1J9efF1dId6/v58Ev/xy/zZ35pmVMQdA0XVVwF6OHyK4+EiKb80a\ns+22M5s82ayjo+fru/VWsz59zHbZxez553u+vkKtX2924ol+kcvBB5u9+27h63j8cX/+/fcXPz4x\ne+YZs0TCrK7OfxKJzRefzZ9vdswxZvX1Zn37mp18stnixdHGW2xkufhISV2K4sc/9qPpb3/r/jo2\nbjQ77TRfz4EHmi1fXrz4uuOGG8x69TLbfXdPFIW49VbfjgULShObuGxXiC9caHbccWa9e/vP8cf7\nsmqgpC4lt26dX6q/777da60vW2bW1ORH5BlneIKPgxkzzIYONRs82OyBB/J/3rRpPnyBhqSI3uLF\n3lrv08db78cea/bii1FH1TPZkrr61KUo+vb1fu/HH/dqmEI8/7z3Y8+aBbfdBldfHZ+JwCdP9n72\nUaO8pPKyy/KrsGhtVeVLXAwf7lehvvKKn9i/807YYw+fKi81f2w10dgvUjQbN/pcpltt5bXk+ZzY\nvO8+L1kcOND/3nff0sfZHatXw9e/7mPdfOUrcMstfmKuKyNG+Jg0t99evhglP0uX+pDb11/vww0c\nfrg3SPbeO+rI8qexX6Qsevf2C5GeecavNM2mo8Pnhz3sMC/7e/rp+CZ08GqL3/zGp1b79a+97HHJ\nks4fu3Klz+WqcsZ4GjbMv3EtWuTlj3/9q1c+feEL5bs6uqS66pcpxw/qU686GzeajR5tttdeZu3t\nnT9m5Uqzww/3/vOjj/ZhcSvJ739vNnCg2bBhZo8++sH7n3jCt+2++8ofmxTunXfMmpvNttxyc8XT\n7NlRR5Ud6lOXcunVywf5mjcP7rnH67zTa71ffRX239+7Wq68En7+c590o5J87nN+7mCLLaCx0YcY\nSDdvnv9WS70yDBnix+zixfD973vX4X77wYEHdn3tReZxHStdZfty/KCWelXatMls553NBgzwyTTq\n6swaG81uusls6629RfTnP0cdZc8tX2520EHeujv1VK9Nb2z022A2Zcr7J+6QyrBqldkVV/g3MTCb\nNMlLdTs6vD6+sXFzfXxjYzT7GE2SIeU0Z463dDob9XDUKPjLX2C33cofVyls2uRXnl51lV+injlV\nXkODD0A1fnw08Un3rV3rJ8QvvdTHxd9rLx/6OfO4jmIfZztRqqQuRZdI+PjWnZk0Kfsoe5Vqjz18\nSIPOJBKlnYxDSmv9ei+1PeOMrodnLvc+VlKXsmlv90kruprcua4ONmyorvrtWtzmWtPe7tVdXaWr\ncu/jkpY0hhC2CiHcF0JYFUJYFEI4sqfrFBGJm1IPKFcsxah+uQ5YB2wLHAXcEELQ2HQ1KtfQqFOm\nVF+LtRa3udZU0j7uUfdLCKEBeAfY08xeTi77OfCGmf1vHs9X90sVmjvX+85Tc0mmNDTAzJkwblw0\ncZVSLW5zrYnTPi5l98toYFMqoSc9B6hCt4aNG+cnQxMJ72usq/O/qzm51eI215pK2cc9balPAu4y\ns+3Slh0PfMXMEnk8Xy31Kpe6QCMuX03LoRa3udZEvY+ztdR7OhbeKmBQxrLBwMp8V9Dc3Pyfvxsb\nG2lsbOxhSBIntZjYanGba02593FLSwstXdUJZyhFn/ovgX+a2XfyeL5a6iIiBSppnXoI4deAAccB\n44E/Ap8wsxfyeK6SuohIgUo99O7JQH/gLeBXwIn5JHQRESk+XVEqIlJhNEmGiEiNUFIXEakiSuoi\nIlVESV1EpIooqYuIVBEldRGRKlKTST3fy22riba5NmibRUm9Rmiba4O2WWoyqYuIVCsldRGRKhL5\nMAGRvbiISAUr2SiNIiISH+p+ERGpIkrqIiJVREldRKSKKKmLiFQRJXURkSry/wG5e+lTvNCXPgAA\nAABJRU5ErkJggg==\n",
+ "text": [
+ ""
+ ]
+ }
+ ],
+ "prompt_number": 26
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 26
+ }
+ ],
+ "metadata": {}
+ }
+ ]
+}
\ No newline at end of file
diff --git a/task-05/test b/task-05/test
new file mode 100644
index 0000000..e69de29
diff --git a/task-09/get_top_names.py b/task-09/get_top_names.py
index cccd0ad..b12f829 100644
--- a/task-09/get_top_names.py
+++ b/task-09/get_top_names.py
@@ -5,6 +5,7 @@
"""
import pandas as pd
+import sqlite3
def extract_data_lines(filename, start_text, end_text, include_start=False,
include_end=False):
@@ -31,17 +32,56 @@ class NameRecorder:
def __init__(self):
self.records = []
self.year = None
+
def add(self, name, is_female, rank):
+
+
if self.year is None:
+
raise ValueError('One must set year first')
+
+
+ if is_female:
+
+ self.gender = 'Female'
+
+ else:
+
+ self.gender = 'Male'
- # complete this member function
- raise NotImplementedError
+
+ self.records.append((self.year, self.gender, rank, name))
+
def to_pandas(self):
- # complete this member function
- raise NotImplementedError
+
+ if self.records == []:
+
+ raise ValueError('Empty data base')
+
+ return pd.DataFrame.from_records(self.records, columns=['year', 'gender', 'rank', 'name'])
+
+
+ def to_sql(self, filename = None):
+
+ # https://www.dataquest.io/blog/python-pandas-databases/
+ # Check output by typing:
+ # recorder.to_sql("names")
+ # import sqlite3
+ # conn = sqlite3.connect("names.db")
+ # pd.read_sql_query("select * from names;", conn)
+
+ if filename is None:
+
+ raise ValueError('Please, insert file name')
+
+ else: # From pandas to SQL
+
+ conn = sqlite3.connect("%s.db" % (filename))
+ df = recorder.to_pandas()
+ return df.to_sql(filename, conn, if_exists="replace")
+
def clear(self):
self.records.clear()
@@ -66,4 +106,4 @@ def clear(self):
data = recorder.to_pandas()
- print(data.query('name == "Emma"').query('rank == 1')['year'].tolist())
+ print(data.query('name == "Emma"').query('rank == 1')['year'].tolist())
\ No newline at end of file
diff --git a/task-10/GitHub_API.ipynb b/task-10/GitHub_API.ipynb
new file mode 100755
index 0000000..5fd5aa1
--- /dev/null
+++ b/task-10/GitHub_API.ipynb
@@ -0,0 +1,230 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[u'Hack-Hour', u'AoT', u'learning-by-doing']\n"
+ ]
+ }
+ ],
+ "source": [
+ "import json\n",
+ "import requests\n",
+ "\n",
+ "repos = json.loads(requests.get('https://api.github.com/orgs/astropgh/repos').text)\n",
+ "print([repo['name'] for repo in repos])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[u'2017-11-14T21:47:05Z', u'2017-11-14T22:12:02Z', u'2018-09-27T20:56:58Z']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print([repo['created_at'] for repo in repos])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "hector-mr Task 09\n",
+ "kuanweih Task 09\n",
+ "hector-mr Task 08\n",
+ "troyraen Task/07\n",
+ "kuanweih Task/07\n",
+ "bretthandrews Completes Task/07\n",
+ "djperrefort Completes task-03\n",
+ "djperrefort Adds djperrefort to task-02/completed.md\n",
+ "troyraen Task/03\n",
+ "bretthandrews Completes Task/03\n",
+ "hector-mr Task 07\n",
+ "hsnee Task/07\n",
+ "hector-mr Data structure for task-04\n",
+ "KuanWang-Astro task03 completed\n",
+ "AlanPearl Completed task 1 by adding my username to completed.md\n",
+ "kuanweih completed Task/03\n",
+ "davidjsetton add my username to complete task 01\n",
+ "cfielder Task/02\n",
+ "troyraen Troy name in task-01/completed.md\n",
+ "kevindwilk added my username to task-02/completed.md\n",
+ "kevindwilk added my name Kevin Wilk\n",
+ "lizehan2008 I added my username\n",
+ "lizehan2008 I added my username \"lizehan2008\"\n",
+ "hector-mr Task 03\n",
+ "bretthandrews Completes Task/02\n",
+ "hsnee completed Task/03\n",
+ "hsnee finishing task 02\n",
+ "hsnee task-01 completed\n",
+ "KuanWang-Astro task/02 completed\n",
+ "KuanWang-Astro Task/01 completed, task/05 completed\n"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "pulls_learndo = json.loads(requests.get('https://api.github.com/repos/astropgh/learning-by-doing/pulls').text)\n",
+ "\n",
+ "\n",
+ "\n",
+ "for p, pp in enumerate(pulls_learndo):\n",
+ " \n",
+ " print pp['user']['login'], pp['title']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "This is the user with the highest number of pull requests: hector-mr\n"
+ ]
+ }
+ ],
+ "source": [
+ "pulls_learndo_users = []\n",
+ "\n",
+ "for p, pp in enumerate(pulls_learndo):\n",
+ " \n",
+ " pulls_learndo_users.append(p)\n",
+ " pulls_learndo_users[p] = pp['user']['login']\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "#word_counter = {}\n",
+ "\n",
+ "#for word in pulls_learndo_users:\n",
+ " \n",
+ "# if word in word_counter:\n",
+ " \n",
+ "# word_counter[word] += 1\n",
+ " \n",
+ "# else:\n",
+ " \n",
+ "# word_counter[word] = 1\n",
+ " \n",
+ " \n",
+ "#popular_words = sorted(word_counter, key = word_counter.get, reverse = True)\n",
+ "\n",
+ "#print \"This is the user with the highest number of pull requests: \", popular_words[0]\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "def most_common(lst):\n",
+ " return max(set(lst), key=lst.count)\n",
+ "\n",
+ "print \"This is the user with the highest number of pull requests: \", most_common(pulls_learndo_users)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "numpy_branches = json.loads(requests.get('https://api.github.com/repos/numpy/numpy/branches').text) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "maintenance/1.0.3.x\n",
+ "maintenance/1.1.x\n",
+ "maintenance/1.2.x\n",
+ "maintenance/1.3.x\n",
+ "maintenance/1.4.x\n",
+ "maintenance/1.5.x\n",
+ "maintenance/1.6.x\n",
+ "maintenance/1.7.x\n",
+ "maintenance/1.8.x\n",
+ "maintenance/1.9.x\n",
+ "maintenance/1.10.x\n",
+ "maintenance/1.11.x\n",
+ "maintenance/1.12.x\n",
+ "maintenance/1.13.x\n",
+ "maintenance/1.14.x\n",
+ "maintenance/1.15.x\n",
+ "master\n",
+ "-----------------------------------------------------------------------------------------------\n",
+ "Charles Harris 2018-11-25T22:42:32Z\n",
+ "revert-11693-accept-nep18\n"
+ ]
+ }
+ ],
+ "source": [
+ "for b, bb in enumerate(numpy_branches):\n",
+ " \n",
+ " print bb['name']\n",
+ " \n",
+ " if bb['name'] == 'master':\n",
+ " \n",
+ " print '-----------------------------------------------------------------------------------------------'\n",
+ " \n",
+ " numpy_master_comm_auth = json.loads(requests.get(bb['commit']['url']).text)['commit']['author']['name']\n",
+ " numpy_master_comm_date = json.loads(requests.get(bb['commit']['url']).text)['commit']['author']['date']\n",
+ " \n",
+ " print numpy_master_comm_auth, numpy_master_comm_date"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.15"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
|