From fe9bbfac77952175e34f23fbb17de244175132d7 Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Fri, 3 Apr 2020 23:33:15 +0300
Subject: [PATCH 01/12] France COVID19 Granular Data Extraction

---
 .../covid/fr_covidata/__init__.py             |   3 +
 .../covid/fr_covidata/__main__.py             |  24 +++
 .../covid/fr_covidata/fr_covidata.py          | 185 ++++++++++++++++++
 3 files changed, 212 insertions(+)
 create mode 100644 task_geo/data_sources/covid/fr_covidata/__init__.py
 create mode 100644 task_geo/data_sources/covid/fr_covidata/__main__.py
 create mode 100644 task_geo/data_sources/covid/fr_covidata/fr_covidata.py

diff --git a/task_geo/data_sources/covid/fr_covidata/__init__.py b/task_geo/data_sources/covid/fr_covidata/__init__.py
new file mode 100644
index 0000000..7c7bae7
--- /dev/null
+++ b/task_geo/data_sources/covid/fr_covidata/__init__.py
@@ -0,0 +1,3 @@
+from task_geo.data_sources.covid.fr_covidata.fr_covidata import fr_covidata
+
+__all__ = ['fr_covidata']
\ No newline at end of file
diff --git a/task_geo/data_sources/covid/fr_covidata/__main__.py b/task_geo/data_sources/covid/fr_covidata/__main__.py
new file mode 100644
index 0000000..101d3b8
--- /dev/null
+++ b/task_geo/data_sources/covid/fr_covidata/__main__.py
@@ -0,0 +1,24 @@
+import argparse
+
+from fr_covidata import fr_covidata
+
+
+def get_argparser():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '-o', '--output', required=True,
+        help='Destination file to store the processed dataset.')
+    return parser
+
+
+def main():
+    parser = get_argparser()
+    args = parser.parse_args()
+
+    dataset = fr_covidata()
+    dataset.to_csv(args.output, index=False, header=True)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
new file mode 100644
index 0000000..a40d2fe
--- /dev/null
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -0,0 +1,185 @@
+"""
+fr_covidata.py
+
+Functions:
+    - fr_covidata_connector: Extracts data from CSV URL
+    - fr_covidata_formatter: Cleans CSV data
+    - fr_covidata: Combines the two previous functions
+
+Data Credits:
+    OpenCOVID19-fr
+    https://www.data.gouv.fr/en/datasets/chiffres-cles-concernant-lepidemie-de-covid19-en-france/
+    https://github.com/opencovid19-fr/data
+"""
+
+import requests
+import io
+
+import pandas as pd
+
+url = 'https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv'
+
+
+def fr_covidata():
+    """Data Source for the French COVID-19 Data.
+    Arguments:
+        None
+    Returns:
+        pandas.DataFrame
+    """
+    df = fr_covidata_connector()
+    return fr_covidata_formatter(df)
+
+
+def fr_covidata_connector():
+    """Extract data from OpenCOVID19-fr's Github repository.
+    Description:
+        - Downloads the URL's data in a Unicode CSV Format
+        - Unicode CSV Format: ACS 5Y UTF-8
+    Returns:
+        dataset (DataFrame with CSV Data)
+    """
+  
+    urlData = requests.get(url).content
+
+    dataset = pd.read_csv(io.StringIO(urlData.decode('utf-8')))
+    return dataset
+
+
+def fr_covidata_formatter(dataset):
+    """Formatter for FR COVID-19 Data.
+    Arguments:
+        dataset(pandas.DataFrame): Data as returned by fr_covidata_connector.
+    Description:
+        - Drop unnecessary rows with irrelevant regions' info and only keep info
+        related to subregions in Metropolitan France, as well as repetitive data
+        - Check the dataset for instances where there are more than one source
+        of data in the same subregion for the same date, then complement all the
+        sources information, and take the highest value in case there are
+        different values for the same column, while aggregating the sources info
+        - Rename/Translate the column titles, and add a country column (France)
+    Returns:
+        frcovidata(pandas.DataFrame)
+    """
+
+    dataset = dataset[dataset.granularite != 'region']
+    dataset = dataset[dataset.granularite != 'monde']
+    dataset = dataset[dataset.granularite != 'pays']
+    dataset = dataset[dataset.granularite != 'collectivite-outremer']
+    dataset = dataset[dataset.maille_code != 'DEP-971']
+    dataset = dataset[dataset.maille_code != 'DEP-972']
+    dataset = dataset[dataset.maille_code != 'DEP-973']
+    dataset = dataset[dataset.maille_code != 'DEP-974']
+    dataset = dataset[dataset.maille_code != 'DEP-976']
+    dataset = dataset.drop(['depistes'], axis=1)
+    dataset = dataset.drop(['granularite'], axis=1)
+    dataset = dataset.drop_duplicates(subset=['date', 'maille_code', 'cas_confirmes', 'deces', 'reanimation', 'hospitalises', 'gueris'], keep=False)
+
+    #Reset indices:
+    dataset = dataset.reset_index()
+    dataset = dataset.drop(['index'], axis=1)
+
+    #Turn source columns' values type to string:
+    dataset['source_nom'] = dataset['source_nom'].astype(str)
+    dataset['source_url'] = dataset['source_url'].astype(str)
+    dataset['source_archive'] = dataset['source_archive'].astype(str)
+    dataset['source_type'] = dataset['source_type'].astype(str)
+
+
+    for i in range(len(dataset)-1):
+      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'cas_confirmes'] != 'inv' and i != len(dataset):
+        #Combine Source names, url, archive and type for repetitive subregions at the same date:
+        dataset.loc[i,'source_nom'] = dataset.loc[i,'source_nom'] + dataset.loc[i+1,'source_nom']
+        dataset.loc[i,'source_url'] = dataset.loc[i,'source_url'] + dataset.loc[i+1,'source_url']
+        dataset.loc[i,'source_archive'] = dataset.loc[i,'source_archive'] + dataset.loc[i+1,'source_archive']
+        dataset.loc[i,'source_type'] = dataset.loc[i,'source_type'] + dataset.loc[i+1,'source_type']
+        if pd.isnull(dataset.loc[i,'cas_confirmes']) == True and pd.isnull(dataset.loc[i+1,'cas_confirmes']) == False:
+          dataset.loc[i,'cas_confirmes'] = dataset.loc[i+1,'cas_confirmes']
+          dataset.loc[i+1,'cas_confirmes'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'cas_confirmes']) == False and pd.isnull(dataset.loc[i+1,'cas_confirmes']) == True:
+          dataset.loc[i+1,'cas_confirmes'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'cas_confirmes']) == True and pd.isnull(dataset.loc[i+1,'cas_confirmes']) == True:
+          dataset.loc[i+1,'cas_confirmes'] = 'inv'
+        elif dataset.loc[i,'cas_confirmes'] == dataset.loc[i+1,'cas_confirmes']:
+          dataset.loc[i+1,'cas_confirmes'] = 'inv'
+        elif dataset.loc[i,'cas_confirmes'] > dataset.loc[i+1,'cas_confirmes']:
+          dataset.loc[i+1,'cas_confirmes'] = 'inv'
+        elif dataset.loc[i,'cas_confirmes'] < dataset.loc[i+1,'cas_confirmes']:
+          dataset.loc[i,'cas_confirmes'] = dataset.loc[i+1,'cas_confirmes']
+          dataset.loc[i+1,'cas_confirmes'] = 'inv'        
+
+      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'deces'] != 'inv':
+        if pd.isnull(dataset.loc[i,'deces']) == True and pd.isnull(dataset.loc[i+1,'deces']) == False:
+          dataset.loc[i,'deces'] = dataset.loc[i+1,'deces']
+          dataset.loc[i+1,'deces'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'deces']) == False and pd.isnull(dataset.loc[i+1,'deces']) == True:
+          dataset.loc[i+1,'deces'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'deces']) == True and pd.isnull(dataset.loc[i+1,'deces']) == True:
+          dataset.loc[i+1,'deces'] = 'inv'
+        elif dataset.loc[i,'deces'] == dataset.loc[i+1,'deces']:
+          dataset.loc[i+1,'deces'] = 'inv'
+        elif dataset.loc[i,'deces'] > dataset.loc[i+1,'deces']:
+          dataset.loc[i+1,'deces'] = 'inv'
+        elif dataset.loc[i,'deces'] < dataset.loc[i+1,'deces']:
+          dataset.loc[i,'deces'] = dataset.loc[i+1,'deces']
+          dataset.loc[i+1,'deces'] = 'inv'
+
+      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'reanimation'] != 'inv':
+        if pd.isnull(dataset.loc[i,'reanimation']) == True and pd.isnull(dataset.loc[i+1,'reanimation']) == False:
+          dataset.loc[i,'reanimation'] = dataset.loc[i+1,'reanimation']
+          dataset.loc[i+1,'reanimation'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'reanimation']) == False and pd.isnull(dataset.loc[i+1,'reanimation']) == True:
+          dataset.loc[i+1,'reanimation'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'reanimation']) == True and pd.isnull(dataset.loc[i+1,'reanimation']) == True:
+          dataset.loc[i+1,'reanimation'] = 'inv'
+        elif dataset.loc[i,'reanimation'] == dataset.loc[i+1,'reanimation']:
+          dataset.loc[i+1,'reanimation'] = 'inv'
+        elif dataset.loc[i,'reanimation'] > dataset.loc[i+1,'reanimation']:
+          dataset.loc[i+1,'reanimation'] = 'inv'
+        elif dataset.loc[i,'reanimation'] < dataset.loc[i+1,'reanimation']:
+          dataset.loc[i,'reanimation'] = dataset.loc[i+1,'reanimation']
+          dataset.loc[i+1,'reanimation'] = 'inv'
+
+      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'hospitalises'] != 'inv':
+        if pd.isnull(dataset.loc[i,'hospitalises']) == True and pd.isnull(dataset.loc[i+1,'hospitalises']) == False:
+          dataset.loc[i,'hospitalises'] = dataset.loc[i+1,'hospitalises']
+          dataset.loc[i+1,'hospitalises'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'hospitalises']) == False and pd.isnull(dataset.loc[i+1,'hospitalises']) == True:
+          dataset.loc[i+1,'hospitalises'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'hospitalises']) == True and pd.isnull(dataset.loc[i+1,'hospitalises']) == True:
+          dataset.loc[i+1,'hospitalises'] = 'inv'
+        elif dataset.loc[i,'hospitalises'] == dataset.loc[i+1,'hospitalises']:
+          dataset.loc[i+1,'hospitalises'] = 'inv'
+        elif dataset.loc[i,'hospitalises'] > dataset.loc[i+1,'hospitalises']:
+          dataset.loc[i+1,'hospitalises'] = 'inv'
+        elif dataset.loc[i,'hospitalises'] < dataset.loc[i+1,'hospitalises']:
+          dataset.loc[i,'hospitalises'] = dataset.loc[i+1,'hospitalises']
+          dataset.loc[i+1,'hospitalises'] = 'inv'
+      
+      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'gueris'] != 'inv':
+        if pd.isnull(dataset.loc[i,'gueris']) == True and pd.isnull(dataset.loc[i+1,'gueris']) == False:
+          dataset.loc[i,'gueris'] = dataset.loc[i+1,'gueris']
+          dataset.loc[i+1,'gueris'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'gueris']) == False and pd.isnull(dataset.loc[i+1,'gueris']) == True:
+          dataset.loc[i+1,'gueris'] = 'inv'
+        elif pd.isnull(dataset.loc[i,'gueris']) == True and pd.isnull(dataset.loc[i+1,'gueris']) == True:
+          dataset.loc[i+1,'gueris'] = 'inv'
+        elif dataset.loc[i,'gueris'] == dataset.loc[i+1,'gueris']:
+          dataset.loc[i+1,'gueris'] = 'inv'
+        elif dataset.loc[i,'gueris'] > dataset.loc[i+1,'gueris']:
+          dataset.loc[i+1,'gueris'] = 'inv'
+        elif dataset.loc[i,'gueris'] < dataset.loc[i+1,'gueris']:
+          dataset.loc[i,'gueris'] = dataset.loc[i+1,'gueris']
+          dataset.loc[i+1,'gueris'] = 'inv'
+
+    #Delete the redundant resulting rows and reset the indices:
+    dataset = dataset[dataset.cas_confirmes != 'inv']
+    dataset = dataset.reset_index()
+    dataset = dataset.drop(['index'], axis=1)
+
+    #Rename/Translate the column titles:
+    dataset = dataset.rename(columns={"maille_code":"subregion_code", "maille_nom":"subregion_name", "cas_confirmes": "confirmed", "deces": "deaths", "reanimation": "recovering", "hospitalises": "hospitalized", "gueris": "recovered", "source_nom": "source_name"})
+    dataset['country'] = 'France'
+    frcovidata = dataset[['subregion_code', 'subregion_name', 'country', 'date', 'confirmed', 'hospitalized', 'recovering', 'recovered', 'deaths', 'source_name', 'source_url', 'source_archive', 'source_type']]
+
+    return frcovidata
\ No newline at end of file

From 533df325a001b6b6d52bc7b07110b0283d467f67 Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Sun, 5 Apr 2020 16:22:38 +0300
Subject: [PATCH 02/12] Update __init__.py

---
 task_geo/data_sources/covid/fr_covidata/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/__init__.py b/task_geo/data_sources/covid/fr_covidata/__init__.py
index 7c7bae7..80d4d99 100644
--- a/task_geo/data_sources/covid/fr_covidata/__init__.py
+++ b/task_geo/data_sources/covid/fr_covidata/__init__.py
@@ -1,3 +1,3 @@
 from task_geo.data_sources.covid.fr_covidata.fr_covidata import fr_covidata
 
-__all__ = ['fr_covidata']
\ No newline at end of file
+__all__ = ['fr_covidata']

From 459b44a941431b6eb0f67a8a1961b247bcf13cc0 Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Sun, 5 Apr 2020 16:24:27 +0300
Subject: [PATCH 03/12] Update __main__.py

---
 task_geo/data_sources/covid/fr_covidata/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/__main__.py b/task_geo/data_sources/covid/fr_covidata/__main__.py
index 101d3b8..116d7f6 100644
--- a/task_geo/data_sources/covid/fr_covidata/__main__.py
+++ b/task_geo/data_sources/covid/fr_covidata/__main__.py
@@ -21,4 +21,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From f4fc5bc39a962d491ecb11e8fa08fbd45d2df889 Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Sun, 5 Apr 2020 16:26:59 +0300
Subject: [PATCH 04/12] Update fr_covidata.py

---
 .../covid/fr_covidata/fr_covidata.py          | 276 +++++++++++-------
 1 file changed, 174 insertions(+), 102 deletions(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
index a40d2fe..aaa0c0b 100644
--- a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -12,12 +12,15 @@
     https://github.com/opencovid19-fr/data
 """
 
-import requests
 import io
 
 import pandas as pd
+import requests
 
-url = 'https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv'
+url = (
+    'https://raw.githubusercontent.com/opencovid19-fr/'
+    'data/master/dist/chiffres-cles.csv'
+    )
 
 
 def fr_covidata():
@@ -39,7 +42,7 @@ def fr_covidata_connector():
     Returns:
         dataset (DataFrame with CSV Data)
     """
-  
+
     urlData = requests.get(url).content
 
     dataset = pd.read_csv(io.StringIO(urlData.decode('utf-8')))
@@ -51,12 +54,14 @@ def fr_covidata_formatter(dataset):
     Arguments:
         dataset(pandas.DataFrame): Data as returned by fr_covidata_connector.
     Description:
-        - Drop unnecessary rows with irrelevant regions' info and only keep info
-        related to subregions in Metropolitan France, as well as repetitive data
+        - Drop unnecessary rows with irrelevant regions' info and only keep
+        info related to subregions in Metropolitan France, as well as
+        repetitive data
         - Check the dataset for instances where there are more than one source
-        of data in the same subregion for the same date, then complement all the
-        sources information, and take the highest value in case there are
-        different values for the same column, while aggregating the sources info
+        of data in the same subregion for the same date, then complement all
+        the sources information, and take the highest value in case there are
+        different values for the same column, while aggregating the sources
+        info
         - Rename/Translate the column titles, and add a country column (France)
     Returns:
         frcovidata(pandas.DataFrame)
@@ -73,113 +78,180 @@ def fr_covidata_formatter(dataset):
     dataset = dataset[dataset.maille_code != 'DEP-976']
     dataset = dataset.drop(['depistes'], axis=1)
     dataset = dataset.drop(['granularite'], axis=1)
-    dataset = dataset.drop_duplicates(subset=['date', 'maille_code', 'cas_confirmes', 'deces', 'reanimation', 'hospitalises', 'gueris'], keep=False)
+    dataset = dataset.drop_duplicates(
+        subset=['date', 'maille_code', 'cas_confirmes', 'deces',
+                'reanimation',
+                'hospitalises', 'gueris'], keep=False)
 
-    #Reset indices:
+    # Reset indices:
     dataset = dataset.reset_index()
     dataset = dataset.drop(['index'], axis=1)
 
-    #Turn source columns' values type to string:
+    # Turn source columns' values type to string:
     dataset['source_nom'] = dataset['source_nom'].astype(str)
     dataset['source_url'] = dataset['source_url'].astype(str)
     dataset['source_archive'] = dataset['source_archive'].astype(str)
     dataset['source_type'] = dataset['source_type'].astype(str)
 
+    for i in range(len(dataset) - 1):
+        if dataset.loc[i, 'maille_code'] == dataset.loc[
+            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
+            i + 1, 'date'] and dataset.loc[
+                i, 'cas_confirmes'] != 'inv' and i != len(dataset):
+            # Combine Source names, url, archive and type for repetitive
+            # subregions at the same date:
+            dataset.loc[i, 'source_nom'] = dataset.loc[i, 'source_nom'] + \
+                                           dataset.loc[i + 1, 'source_nom']
+            dataset.loc[i, 'source_url'] = dataset.loc[i, 'source_url'] + \
+                dataset.loc[i + 1, 'source_url']
+            dataset.loc[i, 'source_archive'] = dataset.loc[
+                                                   i, 'source_archive'] + \
+                dataset.loc[
+                                                   i + 1, 'source_archive']
+            dataset.loc[i, 'source_type'] = dataset.loc[i, 'source_type'] + \
+                dataset.loc[i + 1, 'source_type']
+            if pd.isnull(
+                    dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'cas_confirmes']) is False:
+                    dataset.loc[i, 'cas_confirmes'] = dataset.loc[
+                                                        i + 1, 'cas_confirmes']
+                    dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+            elif pd.isnull(
+                    dataset.loc[i, 'cas_confirmes']) is False and pd.isnull(
+                    dataset.loc[i + 1, 'cas_confirmes']) is True:
+                        dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+            elif pd.isnull(
+                    dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'cas_confirmes']) is True:
+                        dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+            elif dataset.loc[i, 'cas_confirmes'] == dataset.loc[
+                                                    i + 1, 'cas_confirmes']:
+                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+            elif dataset.loc[i, 'cas_confirmes'] > dataset.loc[
+                                                    i + 1, 'cas_confirmes']:
+                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+            elif dataset.loc[i, 'cas_confirmes'] < dataset.loc[
+                                                    i + 1, 'cas_confirmes']:
+                dataset.loc[i, 'cas_confirmes'] = dataset.loc[
+                    i + 1, 'cas_confirmes']
+                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+
+        if dataset.loc[i, 'maille_code'] == dataset.loc[
+            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
+                i + 1, 'date'] and dataset.loc[i, 'deces'] != 'inv':
+            if pd.isnull(dataset.loc[i, 'deces']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'deces']) is False:
+                dataset.loc[i, 'deces'] = dataset.loc[i + 1, 'deces']
+                dataset.loc[i + 1, 'deces'] = 'inv'
+            elif pd.isnull(dataset.loc[i, 'deces']) is False and pd.isnull(
+                    dataset.loc[i + 1, 'deces']) is True:
+                dataset.loc[i + 1, 'deces'] = 'inv'
+            elif pd.isnull(dataset.loc[i, 'deces']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'deces']) is True:
+                dataset.loc[i + 1, 'deces'] = 'inv'
+            elif dataset.loc[i, 'deces'] == dataset.loc[i + 1, 'deces']:
+                dataset.loc[i + 1, 'deces'] = 'inv'
+            elif dataset.loc[i, 'deces'] > dataset.loc[i + 1, 'deces']:
+                dataset.loc[i + 1, 'deces'] = 'inv'
+            elif dataset.loc[i, 'deces'] < dataset.loc[i + 1, 'deces']:
+                dataset.loc[i, 'deces'] = dataset.loc[i + 1, 'deces']
+                dataset.loc[i + 1, 'deces'] = 'inv'
+
+        if dataset.loc[i, 'maille_code'] == dataset.loc[
+            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
+                i + 1, 'date'] and dataset.loc[i, 'reanimation'] != 'inv':
+            if pd.isnull(dataset.loc[i, 'reanimation']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'reanimation']) is False:
+                dataset.loc[i, 'reanimation'] = dataset.loc[
+                    i + 1, 'reanimation']
+                dataset.loc[i + 1, 'reanimation'] = 'inv'
+            elif pd.isnull(
+                    dataset.loc[i, 'reanimation']) is False and pd.isnull(
+                        dataset.loc[i + 1, 'reanimation']) is True:
+                dataset.loc[i + 1, 'reanimation'] = 'inv'
+            elif pd.isnull(
+                    dataset.loc[i, 'reanimation']) is True and pd.isnull(
+                        dataset.loc[i + 1, 'reanimation']) is True:
+                dataset.loc[i + 1, 'reanimation'] = 'inv'
+            elif dataset.loc[i, 'reanimation'] == dataset.loc[
+                                                    i + 1, 'reanimation']:
+                dataset.loc[i + 1, 'reanimation'] = 'inv'
+            elif dataset.loc[i, 'reanimation'] > dataset.loc[
+                                                    i + 1, 'reanimation']:
+                dataset.loc[i + 1, 'reanimation'] = 'inv'
+            elif dataset.loc[i, 'reanimation'] < dataset.loc[
+                                                    i + 1, 'reanimation']:
+                dataset.loc[i, 'reanimation'] = dataset.loc[
+                    i + 1, 'reanimation']
+                dataset.loc[i + 1, 'reanimation'] = 'inv'
+
+        if dataset.loc[i, 'maille_code'] == dataset.loc[
+            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
+                i + 1, 'date'] and dataset.loc[i, 'hospitalises'] != 'inv':
+            if pd.isnull(
+                dataset.loc[i, 'hospitalises']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'hospitalises']) is False:
+                    dataset.loc[i, 'hospitalises'] = dataset.loc[
+                        i + 1, 'hospitalises']
+                    dataset.loc[i + 1, 'hospitalises'] = 'inv'
+            elif pd.isnull(
+                    dataset.loc[i, 'hospitalises']) is False and pd.isnull(
+                    dataset.loc[i + 1, 'hospitalises']) is True:
+                dataset.loc[i + 1, 'hospitalises'] = 'inv'
+            elif pd.isnull(
+                    dataset.loc[i, 'hospitalises']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'hospitalises']) is True:
+                        dataset.loc[i + 1, 'hospitalises'] = 'inv'
+            elif dataset.loc[i, 'hospitalises'] == dataset.loc[
+                                                    i + 1, 'hospitalises']:
+                dataset.loc[i + 1, 'hospitalises'] = 'inv'
+            elif dataset.loc[i, 'hospitalises'] > dataset.loc[
+                                                    i + 1, 'hospitalises']:
+                dataset.loc[i + 1, 'hospitalises'] = 'inv'
+            elif dataset.loc[i, 'hospitalises'] < dataset.loc[
+                                                    i + 1, 'hospitalises']:
+                dataset.loc[i, 'hospitalises'] = dataset.loc[
+                    i + 1, 'hospitalises']
+                dataset.loc[i + 1, 'hospitalises'] = 'inv'
+
+        if dataset.loc[i, 'maille_code'] == dataset.loc[
+            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
+                i + 1, 'date'] and dataset.loc[i, 'gueris'] != 'inv':
+            if pd.isnull(dataset.loc[i, 'gueris']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'gueris']) is False:
+                dataset.loc[i, 'gueris'] = dataset.loc[i + 1, 'gueris']
+                dataset.loc[i + 1, 'gueris'] = 'inv'
+            elif pd.isnull(dataset.loc[i, 'gueris']) is False and pd.isnull(
+                    dataset.loc[i + 1, 'gueris']) is True:
+                dataset.loc[i + 1, 'gueris'] = 'inv'
+            elif pd.isnull(dataset.loc[i, 'gueris']) is True and pd.isnull(
+                    dataset.loc[i + 1, 'gueris']) is True:
+                dataset.loc[i + 1, 'gueris'] = 'inv'
+            elif dataset.loc[i, 'gueris'] == dataset.loc[i + 1, 'gueris']:
+                dataset.loc[i + 1, 'gueris'] = 'inv'
+            elif dataset.loc[i, 'gueris'] > dataset.loc[i + 1, 'gueris']:
+                dataset.loc[i + 1, 'gueris'] = 'inv'
+            elif dataset.loc[i, 'gueris'] < dataset.loc[i + 1, 'gueris']:
+                dataset.loc[i, 'gueris'] = dataset.loc[i + 1, 'gueris']
+                dataset.loc[i + 1, 'gueris'] = 'inv'
 
-    for i in range(len(dataset)-1):
-      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'cas_confirmes'] != 'inv' and i != len(dataset):
-        #Combine Source names, url, archive and type for repetitive subregions at the same date:
-        dataset.loc[i,'source_nom'] = dataset.loc[i,'source_nom'] + dataset.loc[i+1,'source_nom']
-        dataset.loc[i,'source_url'] = dataset.loc[i,'source_url'] + dataset.loc[i+1,'source_url']
-        dataset.loc[i,'source_archive'] = dataset.loc[i,'source_archive'] + dataset.loc[i+1,'source_archive']
-        dataset.loc[i,'source_type'] = dataset.loc[i,'source_type'] + dataset.loc[i+1,'source_type']
-        if pd.isnull(dataset.loc[i,'cas_confirmes']) == True and pd.isnull(dataset.loc[i+1,'cas_confirmes']) == False:
-          dataset.loc[i,'cas_confirmes'] = dataset.loc[i+1,'cas_confirmes']
-          dataset.loc[i+1,'cas_confirmes'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'cas_confirmes']) == False and pd.isnull(dataset.loc[i+1,'cas_confirmes']) == True:
-          dataset.loc[i+1,'cas_confirmes'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'cas_confirmes']) == True and pd.isnull(dataset.loc[i+1,'cas_confirmes']) == True:
-          dataset.loc[i+1,'cas_confirmes'] = 'inv'
-        elif dataset.loc[i,'cas_confirmes'] == dataset.loc[i+1,'cas_confirmes']:
-          dataset.loc[i+1,'cas_confirmes'] = 'inv'
-        elif dataset.loc[i,'cas_confirmes'] > dataset.loc[i+1,'cas_confirmes']:
-          dataset.loc[i+1,'cas_confirmes'] = 'inv'
-        elif dataset.loc[i,'cas_confirmes'] < dataset.loc[i+1,'cas_confirmes']:
-          dataset.loc[i,'cas_confirmes'] = dataset.loc[i+1,'cas_confirmes']
-          dataset.loc[i+1,'cas_confirmes'] = 'inv'        
-
-      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'deces'] != 'inv':
-        if pd.isnull(dataset.loc[i,'deces']) == True and pd.isnull(dataset.loc[i+1,'deces']) == False:
-          dataset.loc[i,'deces'] = dataset.loc[i+1,'deces']
-          dataset.loc[i+1,'deces'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'deces']) == False and pd.isnull(dataset.loc[i+1,'deces']) == True:
-          dataset.loc[i+1,'deces'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'deces']) == True and pd.isnull(dataset.loc[i+1,'deces']) == True:
-          dataset.loc[i+1,'deces'] = 'inv'
-        elif dataset.loc[i,'deces'] == dataset.loc[i+1,'deces']:
-          dataset.loc[i+1,'deces'] = 'inv'
-        elif dataset.loc[i,'deces'] > dataset.loc[i+1,'deces']:
-          dataset.loc[i+1,'deces'] = 'inv'
-        elif dataset.loc[i,'deces'] < dataset.loc[i+1,'deces']:
-          dataset.loc[i,'deces'] = dataset.loc[i+1,'deces']
-          dataset.loc[i+1,'deces'] = 'inv'
-
-      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'reanimation'] != 'inv':
-        if pd.isnull(dataset.loc[i,'reanimation']) == True and pd.isnull(dataset.loc[i+1,'reanimation']) == False:
-          dataset.loc[i,'reanimation'] = dataset.loc[i+1,'reanimation']
-          dataset.loc[i+1,'reanimation'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'reanimation']) == False and pd.isnull(dataset.loc[i+1,'reanimation']) == True:
-          dataset.loc[i+1,'reanimation'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'reanimation']) == True and pd.isnull(dataset.loc[i+1,'reanimation']) == True:
-          dataset.loc[i+1,'reanimation'] = 'inv'
-        elif dataset.loc[i,'reanimation'] == dataset.loc[i+1,'reanimation']:
-          dataset.loc[i+1,'reanimation'] = 'inv'
-        elif dataset.loc[i,'reanimation'] > dataset.loc[i+1,'reanimation']:
-          dataset.loc[i+1,'reanimation'] = 'inv'
-        elif dataset.loc[i,'reanimation'] < dataset.loc[i+1,'reanimation']:
-          dataset.loc[i,'reanimation'] = dataset.loc[i+1,'reanimation']
-          dataset.loc[i+1,'reanimation'] = 'inv'
-
-      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'hospitalises'] != 'inv':
-        if pd.isnull(dataset.loc[i,'hospitalises']) == True and pd.isnull(dataset.loc[i+1,'hospitalises']) == False:
-          dataset.loc[i,'hospitalises'] = dataset.loc[i+1,'hospitalises']
-          dataset.loc[i+1,'hospitalises'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'hospitalises']) == False and pd.isnull(dataset.loc[i+1,'hospitalises']) == True:
-          dataset.loc[i+1,'hospitalises'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'hospitalises']) == True and pd.isnull(dataset.loc[i+1,'hospitalises']) == True:
-          dataset.loc[i+1,'hospitalises'] = 'inv'
-        elif dataset.loc[i,'hospitalises'] == dataset.loc[i+1,'hospitalises']:
-          dataset.loc[i+1,'hospitalises'] = 'inv'
-        elif dataset.loc[i,'hospitalises'] > dataset.loc[i+1,'hospitalises']:
-          dataset.loc[i+1,'hospitalises'] = 'inv'
-        elif dataset.loc[i,'hospitalises'] < dataset.loc[i+1,'hospitalises']:
-          dataset.loc[i,'hospitalises'] = dataset.loc[i+1,'hospitalises']
-          dataset.loc[i+1,'hospitalises'] = 'inv'
-      
-      if dataset.loc[i,'maille_code'] == dataset.loc[i+1,'maille_code'] and dataset.loc[i,'date'] == dataset.loc[i+1,'date'] and dataset.loc[i,'gueris'] != 'inv':
-        if pd.isnull(dataset.loc[i,'gueris']) == True and pd.isnull(dataset.loc[i+1,'gueris']) == False:
-          dataset.loc[i,'gueris'] = dataset.loc[i+1,'gueris']
-          dataset.loc[i+1,'gueris'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'gueris']) == False and pd.isnull(dataset.loc[i+1,'gueris']) == True:
-          dataset.loc[i+1,'gueris'] = 'inv'
-        elif pd.isnull(dataset.loc[i,'gueris']) == True and pd.isnull(dataset.loc[i+1,'gueris']) == True:
-          dataset.loc[i+1,'gueris'] = 'inv'
-        elif dataset.loc[i,'gueris'] == dataset.loc[i+1,'gueris']:
-          dataset.loc[i+1,'gueris'] = 'inv'
-        elif dataset.loc[i,'gueris'] > dataset.loc[i+1,'gueris']:
-          dataset.loc[i+1,'gueris'] = 'inv'
-        elif dataset.loc[i,'gueris'] < dataset.loc[i+1,'gueris']:
-          dataset.loc[i,'gueris'] = dataset.loc[i+1,'gueris']
-          dataset.loc[i+1,'gueris'] = 'inv'
-
-    #Delete the redundant resulting rows and reset the indices:
+    # Delete the redundant resulting rows and reset the indices:
     dataset = dataset[dataset.cas_confirmes != 'inv']
     dataset = dataset.reset_index()
     dataset = dataset.drop(['index'], axis=1)
 
-    #Rename/Translate the column titles:
-    dataset = dataset.rename(columns={"maille_code":"subregion_code", "maille_nom":"subregion_name", "cas_confirmes": "confirmed", "deces": "deaths", "reanimation": "recovering", "hospitalises": "hospitalized", "gueris": "recovered", "source_nom": "source_name"})
+    # Rename/Translate the column titles:
+    dataset = dataset.rename(
+        columns={"maille_code": "subregion_code",
+                 "maille_nom": "subregion_name", "cas_confirmes": "confirmed",
+                 "deces": "deaths", "reanimation": "recovering",
+                 "hospitalises": "hospitalized", "gueris": "recovered",
+                 "source_nom": "source_name"})
     dataset['country'] = 'France'
-    frcovidata = dataset[['subregion_code', 'subregion_name', 'country', 'date', 'confirmed', 'hospitalized', 'recovering', 'recovered', 'deaths', 'source_name', 'source_url', 'source_archive', 'source_type']]
+    frcovidata = dataset[
+        ['subregion_code', 'subregion_name', 'country', 'date', 'confirmed',
+         'hospitalized', 'recovering', 'recovered',
+         'deaths', 'source_name', 'source_url', 'source_archive',
+         'source_type']]
 
-    return frcovidata
\ No newline at end of file
+    return frcovidata

From 64af1ab5929c66982a33b2e3adc84f7ad445a3b4 Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Mon, 6 Apr 2020 19:17:49 +0300
Subject: [PATCH 05/12] Update fr_covidata.py

---
 .../covid/fr_covidata/fr_covidata.py          | 52 +++++++++----------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
index aaa0c0b..6e71a00 100644
--- a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -20,7 +20,7 @@
 url = (
     'https://raw.githubusercontent.com/opencovid19-fr/'
     'data/master/dist/chiffres-cles.csv'
-    )
+)
 
 
 def fr_covidata():
@@ -101,37 +101,37 @@ def fr_covidata_formatter(dataset):
             # Combine Source names, url, archive and type for repetitive
             # subregions at the same date:
             dataset.loc[i, 'source_nom'] = dataset.loc[i, 'source_nom'] + \
-                                           dataset.loc[i + 1, 'source_nom']
+                dataset.loc[i + 1, 'source_nom']
             dataset.loc[i, 'source_url'] = dataset.loc[i, 'source_url'] + \
                 dataset.loc[i + 1, 'source_url']
             dataset.loc[i, 'source_archive'] = dataset.loc[
-                                                   i, 'source_archive'] + \
+                i, 'source_archive'] + \
                 dataset.loc[
-                                                   i + 1, 'source_archive']
+                    i + 1, 'source_archive']
             dataset.loc[i, 'source_type'] = dataset.loc[i, 'source_type'] + \
                 dataset.loc[i + 1, 'source_type']
             if pd.isnull(
-                    dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
+                dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
                     dataset.loc[i + 1, 'cas_confirmes']) is False:
-                    dataset.loc[i, 'cas_confirmes'] = dataset.loc[
-                                                        i + 1, 'cas_confirmes']
-                    dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+                dataset.loc[i, 'cas_confirmes'] = dataset.loc[
+                    i + 1, 'cas_confirmes']
+                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
             elif pd.isnull(
-                    dataset.loc[i, 'cas_confirmes']) is False and pd.isnull(
+                dataset.loc[i, 'cas_confirmes']) is False and pd.isnull(
                     dataset.loc[i + 1, 'cas_confirmes']) is True:
-                        dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
             elif pd.isnull(
-                    dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
+                dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
                     dataset.loc[i + 1, 'cas_confirmes']) is True:
-                        dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
+                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
             elif dataset.loc[i, 'cas_confirmes'] == dataset.loc[
-                                                    i + 1, 'cas_confirmes']:
+                    i + 1, 'cas_confirmes']:
                 dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
             elif dataset.loc[i, 'cas_confirmes'] > dataset.loc[
-                                                    i + 1, 'cas_confirmes']:
+                    i + 1, 'cas_confirmes']:
                 dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
             elif dataset.loc[i, 'cas_confirmes'] < dataset.loc[
-                                                    i + 1, 'cas_confirmes']:
+                    i + 1, 'cas_confirmes']:
                 dataset.loc[i, 'cas_confirmes'] = dataset.loc[
                     i + 1, 'cas_confirmes']
                 dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
@@ -174,13 +174,13 @@ def fr_covidata_formatter(dataset):
                         dataset.loc[i + 1, 'reanimation']) is True:
                 dataset.loc[i + 1, 'reanimation'] = 'inv'
             elif dataset.loc[i, 'reanimation'] == dataset.loc[
-                                                    i + 1, 'reanimation']:
+                    i + 1, 'reanimation']:
                 dataset.loc[i + 1, 'reanimation'] = 'inv'
             elif dataset.loc[i, 'reanimation'] > dataset.loc[
-                                                    i + 1, 'reanimation']:
+                    i + 1, 'reanimation']:
                 dataset.loc[i + 1, 'reanimation'] = 'inv'
             elif dataset.loc[i, 'reanimation'] < dataset.loc[
-                                                    i + 1, 'reanimation']:
+                    i + 1, 'reanimation']:
                 dataset.loc[i, 'reanimation'] = dataset.loc[
                     i + 1, 'reanimation']
                 dataset.loc[i + 1, 'reanimation'] = 'inv'
@@ -191,25 +191,25 @@ def fr_covidata_formatter(dataset):
             if pd.isnull(
                 dataset.loc[i, 'hospitalises']) is True and pd.isnull(
                     dataset.loc[i + 1, 'hospitalises']) is False:
-                    dataset.loc[i, 'hospitalises'] = dataset.loc[
-                        i + 1, 'hospitalises']
-                    dataset.loc[i + 1, 'hospitalises'] = 'inv'
+                dataset.loc[i, 'hospitalises'] = dataset.loc[
+                    i + 1, 'hospitalises']
+                dataset.loc[i + 1, 'hospitalises'] = 'inv'
             elif pd.isnull(
                     dataset.loc[i, 'hospitalises']) is False and pd.isnull(
                     dataset.loc[i + 1, 'hospitalises']) is True:
                 dataset.loc[i + 1, 'hospitalises'] = 'inv'
             elif pd.isnull(
-                    dataset.loc[i, 'hospitalises']) is True and pd.isnull(
+                dataset.loc[i, 'hospitalises']) is True and pd.isnull(
                     dataset.loc[i + 1, 'hospitalises']) is True:
-                        dataset.loc[i + 1, 'hospitalises'] = 'inv'
+                dataset.loc[i + 1, 'hospitalises'] = 'inv'
             elif dataset.loc[i, 'hospitalises'] == dataset.loc[
-                                                    i + 1, 'hospitalises']:
+                    i + 1, 'hospitalises']:
                 dataset.loc[i + 1, 'hospitalises'] = 'inv'
             elif dataset.loc[i, 'hospitalises'] > dataset.loc[
-                                                    i + 1, 'hospitalises']:
+                    i + 1, 'hospitalises']:
                 dataset.loc[i + 1, 'hospitalises'] = 'inv'
             elif dataset.loc[i, 'hospitalises'] < dataset.loc[
-                                                    i + 1, 'hospitalises']:
+                    i + 1, 'hospitalises']:
                 dataset.loc[i, 'hospitalises'] = dataset.loc[
                     i + 1, 'hospitalises']
                 dataset.loc[i + 1, 'hospitalises'] = 'inv'

From 2120e7927523125d04aba6bceb71d5efce41dc3c Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Fri, 10 Apr 2020 15:45:36 +0300
Subject: [PATCH 06/12] Create datapackage.json

---
 .../covid/fr_covidata/datapackage.json        | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 task_geo/data_sources/covid/fr_covidata/datapackage.json

diff --git a/task_geo/data_sources/covid/fr_covidata/datapackage.json b/task_geo/data_sources/covid/fr_covidata/datapackage.json
new file mode 100644
index 0000000..cc68e5b
--- /dev/null
+++ b/task_geo/data_sources/covid/fr_covidata/datapackage.json
@@ -0,0 +1,77 @@
+{
+    "title": "COVID-19 Granular Data - France",
+    "description": "Sourcing of COVID-19 cases granular data in different subregions of France, aggregated by @opencovid19-fr, https://github.com/opencovid19-fr/",
+    "licenses": [{"name": "copyright-authors"}],
+    "fields": [
+        {
+            "name": "subregion_code",
+            "description": "Geographical location - maille code",
+            "type": "str"
+        },
+        {
+            "name": "subregion_name",
+            "description": "Geographical location - department",
+            "type": "str"
+        },
+        {
+            "name": "country",
+            "description": "Geographical location - country",
+            "type": "str"
+        },
+        {
+            "name": "date",
+            "description": "Date",
+            "type": "datetime.date"
+        },
+        {
+            "name": "confirmed",
+            "description": "Cumulative number of confirmed covid-19 cases at the given location until the given time",
+            "type": "float"
+        },
+        {
+            "name": "hospitalized",
+            "description": "Number of hospitalized people due to covid-19 at the given location at the given time",
+            "type": "float"
+        },
+        {
+            "name": "recovering",
+            "description": "Number of people recovering from covid-19 at the given location at the given time",
+            "type": "float"
+        },
+        {
+            "name": "recovered",
+            "description": "Cumulative number of people healed from covid-19 at the given location until the given time",
+            "type": "float"
+        },
+        {
+            "name": "deaths",
+            "description": "Cumulative number of people who passed away from covid-19 at the given location until the given time",
+            "type": "float"
+        },
+        {
+            "name": "source_name",
+            "description": "Name of the source of information relevant to the given location at the given time",
+            "type": "str"
+        },
+        {
+            "name": "source_url",
+            "description": "Uniform Resource Locator link of the source of information relevant to the given location at the given time",
+            "type": "str"
+        },
+        {
+            "name": "source_archive",
+            "description": "Uniform Resource Locator link of the archive of the source of information relevant to the given location at the given time",
+            "type": "str"
+        },
+        {
+            "name": "source_type",
+            "description": "Type of the source of information: National Health Ministry/Regional health agencies",
+            "type": "str"
+        }
+    ],
+    "keywords": [
+        "France",
+        "COVID-19",
+        "granular data"
+    ]
+}

From 1f840b3daf7d4e65a4239d859c31fc60b82516c4 Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Fri, 10 Apr 2020 16:48:58 +0300
Subject: [PATCH 07/12] Update fr_covidata.py

---
 .../covid/fr_covidata/fr_covidata.py          | 40 ++++++++-----------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
index 6e71a00..4ecbeb0 100644
--- a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -67,31 +67,26 @@ def fr_covidata_formatter(dataset):
         frcovidata(pandas.DataFrame)
     """
 
-    dataset = dataset[dataset.granularite != 'region']
-    dataset = dataset[dataset.granularite != 'monde']
-    dataset = dataset[dataset.granularite != 'pays']
-    dataset = dataset[dataset.granularite != 'collectivite-outremer']
-    dataset = dataset[dataset.maille_code != 'DEP-971']
-    dataset = dataset[dataset.maille_code != 'DEP-972']
-    dataset = dataset[dataset.maille_code != 'DEP-973']
-    dataset = dataset[dataset.maille_code != 'DEP-974']
-    dataset = dataset[dataset.maille_code != 'DEP-976']
-    dataset = dataset.drop(['depistes'], axis=1)
-    dataset = dataset.drop(['granularite'], axis=1)
+    no_granularites = ['region', 'monde', 'pays', 'collectivite-outremer']
+    no_maille_codes = ['DEP-971', 'DEP-972', 'DEP-973', 'DEP-974', 'DEP-976']
+    dataset = dataset[
+        (~dataset.granularite.isin(no_granularites)) &
+        (~dataset.maille_code.isin(no_maille_codes))
+    ]
+    dataset = dataset.drop(['depistes', 'granularite'], axis=1)
     dataset = dataset.drop_duplicates(
         subset=['date', 'maille_code', 'cas_confirmes', 'deces',
                 'reanimation',
                 'hospitalises', 'gueris'], keep=False)
+    dataset['date'] = pd.to_datetime(dataset['date'].astype(str)).dt.date
 
     # Reset indices:
-    dataset = dataset.reset_index()
-    dataset = dataset.drop(['index'], axis=1)
+    dataset = dataset.reset_index(drop=True)
 
     # Turn source columns' values type to string:
-    dataset['source_nom'] = dataset['source_nom'].astype(str)
-    dataset['source_url'] = dataset['source_url'].astype(str)
-    dataset['source_archive'] = dataset['source_archive'].astype(str)
-    dataset['source_type'] = dataset['source_type'].astype(str)
+    str_columns = ['source_nom', 'source_url',
+                   'source_archive', 'source_type']
+    dataset[str_columns] = dataset[str_columns].astype(str)
 
     for i in range(len(dataset) - 1):
         if dataset.loc[i, 'maille_code'] == dataset.loc[
@@ -101,15 +96,15 @@ def fr_covidata_formatter(dataset):
             # Combine Source names, url, archive and type for repetitive
             # subregions at the same date:
             dataset.loc[i, 'source_nom'] = dataset.loc[i, 'source_nom'] + \
-                dataset.loc[i + 1, 'source_nom']
+                " " + dataset.loc[i + 1, 'source_nom']
             dataset.loc[i, 'source_url'] = dataset.loc[i, 'source_url'] + \
-                dataset.loc[i + 1, 'source_url']
+                " " + dataset.loc[i + 1, 'source_url']
             dataset.loc[i, 'source_archive'] = dataset.loc[
-                i, 'source_archive'] + \
+                i, 'source_archive'] + " " + \
                 dataset.loc[
                     i + 1, 'source_archive']
             dataset.loc[i, 'source_type'] = dataset.loc[i, 'source_type'] + \
-                dataset.loc[i + 1, 'source_type']
+                " " + dataset.loc[i + 1, 'source_type']
             if pd.isnull(
                 dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
                     dataset.loc[i + 1, 'cas_confirmes']) is False:
@@ -237,8 +232,7 @@ def fr_covidata_formatter(dataset):
 
     # Delete the redundant resulting rows and reset the indices:
     dataset = dataset[dataset.cas_confirmes != 'inv']
-    dataset = dataset.reset_index()
-    dataset = dataset.drop(['index'], axis=1)
+    dataset = dataset.reset_index(drop=True)
 
     # Rename/Translate the column titles:
     dataset = dataset.rename(

From 567332695bcdab799f94e958bb7e60b945a29472 Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Fri, 10 Apr 2020 17:14:28 +0300
Subject: [PATCH 08/12] Update fr_covidata.py

---
 task_geo/data_sources/covid/fr_covidata/fr_covidata.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
index 4ecbeb0..2b21c7d 100644
--- a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -67,11 +67,10 @@ def fr_covidata_formatter(dataset):
         frcovidata(pandas.DataFrame)
     """
 
-    no_granularites = ['region', 'monde', 'pays', 'collectivite-outremer']
-    no_maille_codes = ['DEP-971', 'DEP-972', 'DEP-973', 'DEP-974', 'DEP-976']
+    no_gr = ['region', 'monde', 'pays', 'collectivite-outremer']
+    no_mc = ['DEP-971', 'DEP-972', 'DEP-973', 'DEP-974', 'DEP-976']
     dataset = dataset[
-        (~dataset.granularite.isin(no_granularites)) &
-        (~dataset.maille_code.isin(no_maille_codes))
+        (~dataset.granularite.isin(no_gr)) & (~dataset.maille_code.isin(no_mc))
     ]
     dataset = dataset.drop(['depistes', 'granularite'], axis=1)
     dataset = dataset.drop_duplicates(

From bf409589ddcf9c329cd9c86538d9eedd031ac56c Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Mon, 13 Apr 2020 19:39:55 +0300
Subject: [PATCH 09/12] Create audit.md

---
 .../data_sources/covid/fr_covidata/audit.md   | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 task_geo/data_sources/covid/fr_covidata/audit.md

diff --git a/task_geo/data_sources/covid/fr_covidata/audit.md b/task_geo/data_sources/covid/fr_covidata/audit.md
new file mode 100644
index 0000000..1b9d996
--- /dev/null
+++ b/task_geo/data_sources/covid/fr_covidata/audit.md
@@ -0,0 +1,71 @@
+# COVID-19 Granular Data - France
+
+## General information
+
+- **Description**: COVID-19 daily confirmed cases/hospitalized/recovering/recovered/deaths data in different subregions of France
+- **Credits**: @opencovid19-fr
+- **Source**: https://github.com/opencovid19-fr/data/
+
+## Column Details
+
+**subregion_code**
+- Description: French Maille department code
+- Type: str
+
+**subregion_name**
+- Description: French department name
+- Type: str
+
+**country**
+- Description: Country Name - France
+- Type: str
+
+**date**
+- Description: yyyy/mm/dd Date
+- Type: datetime.date
+
+**confirmed**
+- Description: Cumulative number of confirmed covid-19 cases at the given location until the given time
+- Type: float
+
+**hospitalized**
+- Description: Number of hospitalized people due to covid-19 at the given location at the given time
+- Type: float
+
+**recovering**
+- Description: Number of people recovering from covid-19 at the given location at the given time
+- Type: float
+
+**recovered**
+- Description: Cumulative number of people healed from covid-19 at the given location until the given time
+- Type: float
+
+**deaths**
+- Description: Cumulative number of people who passed away from covid-19 at the given location until the given time
+- Type: float
+
+**source_name**
+- Description: Name of the source of information relevant to the given location at the given time
+- Type: str
+
+**source_url**
+- Description: Uniform Resource Locator link of the source of information relevant to the given location at the given time
+- Type: str
+
+**source_name**
+- Description: Uniform Resource Locator link of the archive of the source of information relevant to the given location at the given time
+- Type: str
+
+**source_name**
+- Description: Type of the source of information: National Health Ministry/Regional health agencies
+- Type: str
+
+## Transformations applied
+
+- Deleting the rows non-related to the departments inside Metropolitan France ('region', 'monde', 'pays', 'collectivite-outremer', 'DEP-971', 'DEP-972', 'DEP-973', 'DEP-974', 'DEP-976')
+- Dropping the columns 'depistes' and 'granularite'
+- Deleting the dupicate rows containing the same numerical data
+- Switching the 'date' column format from str to datetime.date
+- Switching the 'source_nom', 'source_url', 'source_archive', 'source_type'' columns format to str
+- Merging all the rows that have the same 'date' and 'subregion_name' column value, by taking the maximum of every numerical column value and aggregating both sources columns' info
+- Renaming/Translating the column titles from French to English, and rearranging them

From 673b25039d401d24d5b43ff6d4925a6e7930135f Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Wed, 15 Apr 2020 02:26:04 +0300
Subject: [PATCH 10/12] Update fr_covidata.py

---
 .../covid/fr_covidata/fr_covidata.py          | 167 +++---------------
 1 file changed, 20 insertions(+), 147 deletions(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
index 2b21c7d..41644c7 100644
--- a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -87,151 +87,24 @@ def fr_covidata_formatter(dataset):
                    'source_archive', 'source_type']
     dataset[str_columns] = dataset[str_columns].astype(str)
 
-    for i in range(len(dataset) - 1):
-        if dataset.loc[i, 'maille_code'] == dataset.loc[
-            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
-            i + 1, 'date'] and dataset.loc[
-                i, 'cas_confirmes'] != 'inv' and i != len(dataset):
-            # Combine Source names, url, archive and type for repetitive
-            # subregions at the same date:
-            dataset.loc[i, 'source_nom'] = dataset.loc[i, 'source_nom'] + \
-                " " + dataset.loc[i + 1, 'source_nom']
-            dataset.loc[i, 'source_url'] = dataset.loc[i, 'source_url'] + \
-                " " + dataset.loc[i + 1, 'source_url']
-            dataset.loc[i, 'source_archive'] = dataset.loc[
-                i, 'source_archive'] + " " + \
-                dataset.loc[
-                    i + 1, 'source_archive']
-            dataset.loc[i, 'source_type'] = dataset.loc[i, 'source_type'] + \
-                " " + dataset.loc[i + 1, 'source_type']
-            if pd.isnull(
-                dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'cas_confirmes']) is False:
-                dataset.loc[i, 'cas_confirmes'] = dataset.loc[
-                    i + 1, 'cas_confirmes']
-                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
-            elif pd.isnull(
-                dataset.loc[i, 'cas_confirmes']) is False and pd.isnull(
-                    dataset.loc[i + 1, 'cas_confirmes']) is True:
-                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
-            elif pd.isnull(
-                dataset.loc[i, 'cas_confirmes']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'cas_confirmes']) is True:
-                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
-            elif dataset.loc[i, 'cas_confirmes'] == dataset.loc[
-                    i + 1, 'cas_confirmes']:
-                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
-            elif dataset.loc[i, 'cas_confirmes'] > dataset.loc[
-                    i + 1, 'cas_confirmes']:
-                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
-            elif dataset.loc[i, 'cas_confirmes'] < dataset.loc[
-                    i + 1, 'cas_confirmes']:
-                dataset.loc[i, 'cas_confirmes'] = dataset.loc[
-                    i + 1, 'cas_confirmes']
-                dataset.loc[i + 1, 'cas_confirmes'] = 'inv'
-
-        if dataset.loc[i, 'maille_code'] == dataset.loc[
-            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
-                i + 1, 'date'] and dataset.loc[i, 'deces'] != 'inv':
-            if pd.isnull(dataset.loc[i, 'deces']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'deces']) is False:
-                dataset.loc[i, 'deces'] = dataset.loc[i + 1, 'deces']
-                dataset.loc[i + 1, 'deces'] = 'inv'
-            elif pd.isnull(dataset.loc[i, 'deces']) is False and pd.isnull(
-                    dataset.loc[i + 1, 'deces']) is True:
-                dataset.loc[i + 1, 'deces'] = 'inv'
-            elif pd.isnull(dataset.loc[i, 'deces']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'deces']) is True:
-                dataset.loc[i + 1, 'deces'] = 'inv'
-            elif dataset.loc[i, 'deces'] == dataset.loc[i + 1, 'deces']:
-                dataset.loc[i + 1, 'deces'] = 'inv'
-            elif dataset.loc[i, 'deces'] > dataset.loc[i + 1, 'deces']:
-                dataset.loc[i + 1, 'deces'] = 'inv'
-            elif dataset.loc[i, 'deces'] < dataset.loc[i + 1, 'deces']:
-                dataset.loc[i, 'deces'] = dataset.loc[i + 1, 'deces']
-                dataset.loc[i + 1, 'deces'] = 'inv'
-
-        if dataset.loc[i, 'maille_code'] == dataset.loc[
-            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
-                i + 1, 'date'] and dataset.loc[i, 'reanimation'] != 'inv':
-            if pd.isnull(dataset.loc[i, 'reanimation']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'reanimation']) is False:
-                dataset.loc[i, 'reanimation'] = dataset.loc[
-                    i + 1, 'reanimation']
-                dataset.loc[i + 1, 'reanimation'] = 'inv'
-            elif pd.isnull(
-                    dataset.loc[i, 'reanimation']) is False and pd.isnull(
-                        dataset.loc[i + 1, 'reanimation']) is True:
-                dataset.loc[i + 1, 'reanimation'] = 'inv'
-            elif pd.isnull(
-                    dataset.loc[i, 'reanimation']) is True and pd.isnull(
-                        dataset.loc[i + 1, 'reanimation']) is True:
-                dataset.loc[i + 1, 'reanimation'] = 'inv'
-            elif dataset.loc[i, 'reanimation'] == dataset.loc[
-                    i + 1, 'reanimation']:
-                dataset.loc[i + 1, 'reanimation'] = 'inv'
-            elif dataset.loc[i, 'reanimation'] > dataset.loc[
-                    i + 1, 'reanimation']:
-                dataset.loc[i + 1, 'reanimation'] = 'inv'
-            elif dataset.loc[i, 'reanimation'] < dataset.loc[
-                    i + 1, 'reanimation']:
-                dataset.loc[i, 'reanimation'] = dataset.loc[
-                    i + 1, 'reanimation']
-                dataset.loc[i + 1, 'reanimation'] = 'inv'
-
-        if dataset.loc[i, 'maille_code'] == dataset.loc[
-            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
-                i + 1, 'date'] and dataset.loc[i, 'hospitalises'] != 'inv':
-            if pd.isnull(
-                dataset.loc[i, 'hospitalises']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'hospitalises']) is False:
-                dataset.loc[i, 'hospitalises'] = dataset.loc[
-                    i + 1, 'hospitalises']
-                dataset.loc[i + 1, 'hospitalises'] = 'inv'
-            elif pd.isnull(
-                    dataset.loc[i, 'hospitalises']) is False and pd.isnull(
-                    dataset.loc[i + 1, 'hospitalises']) is True:
-                dataset.loc[i + 1, 'hospitalises'] = 'inv'
-            elif pd.isnull(
-                dataset.loc[i, 'hospitalises']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'hospitalises']) is True:
-                dataset.loc[i + 1, 'hospitalises'] = 'inv'
-            elif dataset.loc[i, 'hospitalises'] == dataset.loc[
-                    i + 1, 'hospitalises']:
-                dataset.loc[i + 1, 'hospitalises'] = 'inv'
-            elif dataset.loc[i, 'hospitalises'] > dataset.loc[
-                    i + 1, 'hospitalises']:
-                dataset.loc[i + 1, 'hospitalises'] = 'inv'
-            elif dataset.loc[i, 'hospitalises'] < dataset.loc[
-                    i + 1, 'hospitalises']:
-                dataset.loc[i, 'hospitalises'] = dataset.loc[
-                    i + 1, 'hospitalises']
-                dataset.loc[i + 1, 'hospitalises'] = 'inv'
-
-        if dataset.loc[i, 'maille_code'] == dataset.loc[
-            i + 1, 'maille_code'] and dataset.loc[i, 'date'] == dataset.loc[
-                i + 1, 'date'] and dataset.loc[i, 'gueris'] != 'inv':
-            if pd.isnull(dataset.loc[i, 'gueris']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'gueris']) is False:
-                dataset.loc[i, 'gueris'] = dataset.loc[i + 1, 'gueris']
-                dataset.loc[i + 1, 'gueris'] = 'inv'
-            elif pd.isnull(dataset.loc[i, 'gueris']) is False and pd.isnull(
-                    dataset.loc[i + 1, 'gueris']) is True:
-                dataset.loc[i + 1, 'gueris'] = 'inv'
-            elif pd.isnull(dataset.loc[i, 'gueris']) is True and pd.isnull(
-                    dataset.loc[i + 1, 'gueris']) is True:
-                dataset.loc[i + 1, 'gueris'] = 'inv'
-            elif dataset.loc[i, 'gueris'] == dataset.loc[i + 1, 'gueris']:
-                dataset.loc[i + 1, 'gueris'] = 'inv'
-            elif dataset.loc[i, 'gueris'] > dataset.loc[i + 1, 'gueris']:
-                dataset.loc[i + 1, 'gueris'] = 'inv'
-            elif dataset.loc[i, 'gueris'] < dataset.loc[i + 1, 'gueris']:
-                dataset.loc[i, 'gueris'] = dataset.loc[i + 1, 'gueris']
-                dataset.loc[i + 1, 'gueris'] = 'inv'
-
-    # Delete the redundant resulting rows and reset the indices:
-    dataset = dataset[dataset.cas_confirmes != 'inv']
-    dataset = dataset.reset_index(drop=True)
+    aggre = {
+        'cas_confirmes': np.max,
+        'cas_ehpad': np.max,
+        'cas_confirmes_ehpad': np.max,
+        'cas_possibles_ehpad': np.max,
+        'deces': np.max,
+        'deces_ehpad': np.max,
+        'reanimation': np.max,
+        'hospitalises': np.max,
+        'gueris': np.max,
+        'source_nom': ','.join,
+        'source_url': ','.join,
+        'source_archive': ','.join,
+        'source_type': ','.join
+    }
+    dataset = dataset.groupby(['date',
+                               'maille_code',
+                               'maille_nom']).aggregate(aggre).reset_index()
 
     # Rename/Translate the column titles:
     dataset = dataset.rename(
@@ -242,9 +115,9 @@ def fr_covidata_formatter(dataset):
                  "source_nom": "source_name"})
     dataset['country'] = 'France'
     frcovidata = dataset[
-        ['subregion_code', 'subregion_name', 'country', 'date', 'confirmed',
+         'subregion_code', 'subregion_name', 'country', 'date', 'confirmed',
          'hospitalized', 'recovering', 'recovered',
          'deaths', 'source_name', 'source_url', 'source_archive',
-         'source_type']]
+         'source_type']
 
     return frcovidata

From d902d02b38f2787289406350c42c7104e4be961d Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Wed, 15 Apr 2020 02:33:08 +0300
Subject: [PATCH 11/12] Update fr_covidata.py

---
 task_geo/data_sources/covid/fr_covidata/fr_covidata.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
index 41644c7..c872cab 100644
--- a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -15,6 +15,7 @@
 import io
 
 import pandas as pd
+import numpy as np
 import requests
 
 url = (
@@ -115,9 +116,9 @@ def fr_covidata_formatter(dataset):
                  "source_nom": "source_name"})
     dataset['country'] = 'France'
     frcovidata = dataset[
-         'subregion_code', 'subregion_name', 'country', 'date', 'confirmed',
-         'hospitalized', 'recovering', 'recovered',
-         'deaths', 'source_name', 'source_url', 'source_archive',
-         'source_type']
+        'subregion_code', 'subregion_name', 'country', 'date', 'confirmed',
+        'hospitalized', 'recovering', 'recovered',
+        'deaths', 'source_name', 'source_url', 'source_archive',
+        'source_type']
 
     return frcovidata

From 848de12f598494aecd792a98fa3186fa886cdebb Mon Sep 17 00:00:00 2001
From: oussamanaji <m.oussama.naji@gmail.com>
Date: Wed, 15 Apr 2020 03:06:29 +0300
Subject: [PATCH 12/12] Update fr_covidata.py

---
 task_geo/data_sources/covid/fr_covidata/fr_covidata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
index c872cab..b2656b4 100644
--- a/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
+++ b/task_geo/data_sources/covid/fr_covidata/fr_covidata.py
@@ -14,8 +14,8 @@
 
 import io
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 import requests
 
 url = (