From 8e89677822879ae7c7b16dbf148afa17ca086db6 Mon Sep 17 00:00:00 2001
From: Husni Almoubayyed <husni@physics.org>
Date: Tue, 2 Oct 2018 16:06:29 -0400
Subject: [PATCH 01/11] task-01 completed

---
 task-01/completed.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/task-01/completed.md b/task-01/completed.md
index bca9187..baec4d1 100644
--- a/task-01/completed.md
+++ b/task-01/completed.md
@@ -1,2 +1,2 @@
 ## Those who have completed this task:
-
+hsnee

From e0d2f4caf2cf2fe2647470d669a325546de10020 Mon Sep 17 00:00:00 2001
From: Yao-Yuan Mao <yymao.astro@gmail.com>
Date: Tue, 2 Oct 2018 14:10:25 -0400
Subject: [PATCH 02/11] bug fix

---
 task-03/get_top_names.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/task-03/get_top_names.py b/task-03/get_top_names.py
index b77911c..4535204 100644
--- a/task-03/get_top_names.py
+++ b/task-03/get_top_names.py
@@ -16,7 +16,7 @@ def extract_data_lines(filename, start_text, end_text):
             # use `yield line` to return desired lines but keep the function going
 
 
-if name == '__main__':
+if __name__ == '__main__':
     filename = 'top5names.html'
     start_text = '<tr><td align="center">2017</td>'
     end_text = '</table></center></div><!-- end #content -->'

From 8a160151249a8cccea5f1d6ae8fcd7648ed7f822 Mon Sep 17 00:00:00 2001
From: Yao-Yuan Mao <yymao.astro@gmail.com>
Date: Tue, 2 Oct 2018 19:35:40 -0400
Subject: [PATCH 03/11] add task 4

---
 task-04/README.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 task-04/README.md

diff --git a/task-04/README.md b/task-04/README.md
new file mode 100644
index 0000000..1336400
--- /dev/null
+++ b/task-04/README.md
@@ -0,0 +1,68 @@
+# Task 4: [database]  Preparing for data scraping: design a data model for top baby names
+
+## Background
+
+Before we start to scrape the top baby names from the webpage, we need to design
+a data model that we will use to store the data.
+
+The term "data model" has different meanings in different contexts.
+We can ask what kind of object the data will be stored in.
+A python list? A python dictionary? A pandas data frame?
+For a given type, we can further ask how the data is stored.
+For example, if we store the data in a pandas data frame, we can ask what
+are the columns and rows.
+
+Let's look at some examples.
+The original webpage store the names as a table, with columns being
+`year`, `female_rank1`, `female_rank2`, `male_rank1`, `male_rank2`..., and
+each row corresponds to one single year.
+
+A more extreme example would be storing the names as a sequence (say a python list),
+the content of the sequence will be the names, while the indices of the sequence encode
+year, ranking, and gender altogether. A possible way to encode the information is
+```python
+year = 2017 - index // 10
+rank = index % 5 + 1
+gender = 'female' if index % 10 < 5 else 'male`
+```
+While this data model preserves all the information, it is unlikely that this
+model will be very convenient when it comes to data exploration.
+
+Yet another totally different data model is to group the data by names.
+Let's say we'll store the data in a python dictionary. A possible way is:
+```python
+{
+    'Emma':{
+        'gender': 'female',
+        'years_ranked_1': [2017, 2016, 2015, 2014, ...],
+        'years_ranked_2': [2013, 2012, 2009, ...],
+        'years_ranked_3': [...],
+    },
+    'Noah':{
+        ...,
+    },
+    ...,
+}
+```
+
+Note that the form (object) that the data is stored and how the data is structured
+are two different things. (*Food for thoughts: why? can you give an example?*)
+
+Clearly, the choice of data model heavily depends on the questions that we would
+like to answer with the data.
+If the amount of data is very large, we will also need to consider the avabilable
+computing resources like memory usage and I/O speed when designing the data model.
+For now, we don't yet need to worry about the limitation due to computing resources.
+
+
+## Task
+
+Try to come up with a data model that is good for answering each of the following questions.
+Think about the code you'll need to write to interact with the data model to answer
+these questions.
+
+1. Which years Emma is the most chosen names?
+2. Which name had been the most chosen name for the longest consecutive years?
+3. How many unique male names have be on top 5 between years 1980 and 2000?
+4. Are there more unique male names or more unique female names that are on top 5?
+5. What is the distribution of the numbers of consecutive years that a male name remains the most chosen name?

From 458086869b9d18d5cd9df2d8a164c58bcd0a1e65 Mon Sep 17 00:00:00 2001
From: Yao-Yuan Mao <yymao.astro@gmail.com>
Date: Wed, 3 Oct 2018 14:07:26 -0400
Subject: [PATCH 04/11] add task-01 solution

---
 task-01/solution.md | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 task-01/solution.md

diff --git a/task-01/solution.md b/task-01/solution.md
new file mode 100644
index 0000000..a194ab8
--- /dev/null
+++ b/task-01/solution.md
@@ -0,0 +1,43 @@
+# Solution to Task 1: [git] Fork a repo and submit a pull request
+
+## Steps
+
+1. Fork `astropgh/learning-by-doing` repository
+
+> Click the "fork" button on the upper right corner on GitHub.
+
+2. Clone your fork
+
+```bash
+git clone git@github.com:yourusername/learning-by-doing.git
+```
+
+3. Checkout a new branch called `task/01`
+
+```bash
+cd learning-by-doing
+git checkout -b task/01
+```
+
+4. Add your GitHub username to `task-01/completed.md`
+
+```bash
+echo "yourusername" >> task-01/completed.md
+```
+
+5. Commit your change to `task/01`
+
+```bash
+git add task-01/completed.md
+git commit -m "add my username to complete task 01"
+```
+
+6. Push `task/01` to your fork
+
+```bash
+git push origin task/01
+```
+
+7. Submit a pull request
+
+> Click "Create pull request" button on GitHub

From b033c2b5288f1c32fb569ee813d3bbb7e2879e79 Mon Sep 17 00:00:00 2001
From: Yao-Yuan Mao <yymao.astro@gmail.com>
Date: Wed, 3 Oct 2018 22:39:39 -0400
Subject: [PATCH 05/11] fix typo

---
 task-02/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/task-02/README.md b/task-02/README.md
index 4b418ae..ab38dd1 100644
--- a/task-02/README.md
+++ b/task-02/README.md
@@ -16,8 +16,7 @@
 - https://help.github.com/articles/configuring-a-remote-for-a-fork/
 - https://help.github.com/articles/syncing-a-fork/
 
-## Food for thoughts
+## Food for thought
 - What's the difference between a fork and a clone?
 - What's the difference between `origin` and `upstream` in this case?
 - What's the benefit to work on new branches like `task/01` and `task/02`, rather than on `master` directly?
-

From 633554a795eb51162cf65cd1172c45e0289be15a Mon Sep 17 00:00:00 2001
From: Yao-Yuan Mao <yymao.astro@gmail.com>
Date: Wed, 3 Oct 2018 22:42:52 -0400
Subject: [PATCH 06/11] add task 5

---
 task-05/README.md | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 task-05/README.md

diff --git a/task-05/README.md b/task-05/README.md
new file mode 100644
index 0000000..8c91522
--- /dev/null
+++ b/task-05/README.md
@@ -0,0 +1,27 @@
+# Task 5: [git] Merge and rebase
+
+*prerequisites*: [Task 1](../task-01), [Task 2](../task-02)
+
+We will now learn two basic operations of git branches: merge and rebase.
+As always, you can find lots of information about this on the Internet,
+and here we will go ahead to learn by trying them out.
+
+## Part 1
+Complete Level 1 through 4 on https://learngitbranching.js.org/
+
+## Part 2
+1. Go back to your clone of `learning-by-doing`. Make sure you've completed Tasks [1](../task-01) and [2](../task-02).
+2. Do **only** Step 2 of [Task 2](../task-02) again.
+3. Now the `master` branch and your `task/01` branch have diverged, and you will rebase `task/01` onto `master`.
+4. Go to see your PR at https://github.com/astropgh/learning-by-doing/pulls, does it somehow change? Why?
+
+## Part 3
+*Note: Do Part 2 first!*
+
+1. Checkout a new branch called `task/05` from `master` (*What does this mean?*)
+2. Add a new file `task-05/test` and commit it to `task/05`
+3. Merge `task/05` into `task/01`
+4. Go to see your PR at https://github.com/astropgh/learning-by-doing/pulls, does it somehow change? Why?
+
+## Food for thought
+- What's the difference between "rebase" and "merge"?

From fc6cc2ad5db2982c198cb6f3286d7ef315c8f2a1 Mon Sep 17 00:00:00 2001
From: Yao-Yuan Mao <yymao.astro@gmail.com>
Date: Wed, 3 Oct 2018 22:43:00 -0400
Subject: [PATCH 07/11] add task 6

---
 task-06/README.md | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 task-06/README.md

diff --git a/task-06/README.md b/task-06/README.md
new file mode 100644
index 0000000..d86075f
--- /dev/null
+++ b/task-06/README.md
@@ -0,0 +1,9 @@
+# Task 6: [database] Basic SQL
+
+Complete "Basic SQL" Lessons 1 through 6 on https://community.modeanalytics.com/sql/
+
+## Extension
+Complete "Basic SQL" Lessons 7 through 15 on https://community.modeanalytics.com/sql/
+
+## Food for thought
+- After learning the basic SQL operation, would you change your answers to [Task 4](../task-04)?

From 36658e5d0eafbd28d2d72d37739e89fcfcf294a8 Mon Sep 17 00:00:00 2001
From: Husni Almoubayyed <husni@physics.org>
Date: Sun, 7 Oct 2018 15:16:58 -0400
Subject: [PATCH 08/11] completing get_top_names generator

---
 task-03/get_top_names.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/task-03/get_top_names.py b/task-03/get_top_names.py
index 4535204..f7c4c2c 100644
--- a/task-03/get_top_names.py
+++ b/task-03/get_top_names.py
@@ -9,12 +9,24 @@ def extract_data_lines(filename, start_text, end_text):
     open `filename`, and yield the lines between
     the line that contains `start_text` and the line that contains `end_text`
     """
-    # fill in code as needed
+    turn_on = False
     with open(filename) as fh:
-        for line in fh:
-            # fill in code as needed
-            # use `yield line` to return desired lines but keep the function going
+        for i,line in enumerate(fh):
+            if turn_on=='done': break
 
+            if end_text in line:
+                if include_end:
+                    turn_on = 'done'
+                    yield line
+                break
+
+            if turn_on: yield line
+
+            if start_text in line:
+                if include_start:
+                    turn_on = True
+                    yield line
+                turn_on = True
 
 if __name__ == '__main__':
     filename = 'top5names.html'

From 36d4b885a8918b2ebc11c3a240daa3f53675a263 Mon Sep 17 00:00:00 2001
From: Husni Almoubayyed <h.almoubayyed@gmail.com>
Date: Tue, 9 Oct 2018 17:12:37 -0400
Subject: [PATCH 09/11] didn't need to enumerate

---
 task-03/get_top_names.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/task-03/get_top_names.py b/task-03/get_top_names.py
index f7c4c2c..54aaf9a 100644
--- a/task-03/get_top_names.py
+++ b/task-03/get_top_names.py
@@ -11,21 +11,17 @@ def extract_data_lines(filename, start_text, end_text):
     """
     turn_on = False
     with open(filename) as fh:
-        for i,line in enumerate(fh):
+        for line in fh:
             if turn_on=='done': break
 
             if end_text in line:
-                if include_end:
-                    turn_on = 'done'
-                    yield line
+                if include_end: turn_on = 'done'; yield line
                 break
 
             if turn_on: yield line
 
             if start_text in line:
-                if include_start:
-                    turn_on = True
-                    yield line
+                if include_start: turn_on = True; yield line
                 turn_on = True
 
 if __name__ == '__main__':

From 679ee5adea6058892f8bf86a6c133fcc9fec7f9f Mon Sep 17 00:00:00 2001
From: Husni Almoubayyed <h.almoubayyed@gmail.com>
Date: Tue, 9 Oct 2018 17:13:54 -0400
Subject: [PATCH 10/11] slightly shorter

---
 task-03/get_top_names.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/task-03/get_top_names.py b/task-03/get_top_names.py
index 54aaf9a..dc59efb 100644
--- a/task-03/get_top_names.py
+++ b/task-03/get_top_names.py
@@ -21,8 +21,9 @@ def extract_data_lines(filename, start_text, end_text):
             if turn_on: yield line
 
             if start_text in line:
-                if include_start: turn_on = True; yield line
                 turn_on = True
+                if include_start: yield line
+                
 
 if __name__ == '__main__':
     filename = 'top5names.html'

From 2f7a0229d589b261e606b6410e96bd0fbd4b2d59 Mon Sep 17 00:00:00 2001
From: Husni Almoubayyed <h.almoubayyed@gmail.com>
Date: Tue, 9 Oct 2018 17:15:48 -0400
Subject: [PATCH 11/11] slightly shorterer

---
 task-03/get_top_names.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/task-03/get_top_names.py b/task-03/get_top_names.py
index dc59efb..85d915a 100644
--- a/task-03/get_top_names.py
+++ b/task-03/get_top_names.py
@@ -15,8 +15,8 @@ def extract_data_lines(filename, start_text, end_text):
             if turn_on=='done': break
 
             if end_text in line:
-                if include_end: turn_on = 'done'; yield line
-                break
+                turn_on = 'done'
+                if include_end: yield line
 
             if turn_on: yield line