diff --git a/.gitignore b/.gitignore
index 7bbc71c..9868b4e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,3 +99,10 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+# data
+/data*
+
+# credentials
+api_info.py
+db_info.py
diff --git a/Get_htmls.py b/Get_htmls.py
new file mode 100644
index 0000000..3527de1
--- /dev/null
+++ b/Get_htmls.py
@@ -0,0 +1,99 @@
+
+# coding: utf-8
+
+# In[24]:
+
+
+from requests import get
+from requests.exceptions import RequestException
+from contextlib import closing
+from bs4 import BeautifulSoup
+from datetime import datetime
+from datetime import timedelta
+import pandas as pd
+import urllib3
+import pickle
+
+
+# In[5]:
+
+
+def simple_get(url):
+    """
+    Attempts to get the content at `url` by making an HTTP GET request.
+    If the content-type of response is some kind of HTML/XML, return the
+    text content, otherwise return None
+    """
+    try:
+        with closing(get(url, stream=True)) as resp:
+            if is_good_response(resp):
+                return resp.content
+            else:
+                return None
+
+    except RequestException as e:
+        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
+        return None
+
+
+def is_good_response(resp):
+    """
+    Returns true if the response seems to be HTML, false otherwise
+    """
+    content_type = resp.headers['Content-Type'].lower()
+    return (resp.status_code == 200 
+            and content_type is not None 
+            and content_type.find('html') > -1)
+
+
+def log_error(e):
+    """
+    It is always a good idea to log errors. 
+    This function just prints them, but you can
+    make it do anything.
+    """
+    print(e)
+
+
+# In[6]:
+
+
+cities=['Berlin', 'Hamburg', 'Munich', 'Cologne', 'Frankfurt_am_Main']
+
+base_url=['https://www.wetter.de/deutschland/wetter-berlin-18228265/','https://www.wetter.de/deutschland/wetter-hamburg-18219464/','https://www.wetter.de/deutschland/wetter-muenchen-18225562/','https://www.wetter.de/deutschland/wetter-koeln-18220679/','https://www.wetter.de/deutschland/wetter-frankfurt-18221009/']
+
+
+# In[18]:
+
+
+def collect_htmls(city_base_url):
+    raw_html=[]
+    days_to_predict = 15
+    http = urllib3.PoolManager()
+    url_hourly_base = city_base_url
+    tag_tags = ['tag-'+str(tag) for tag in range(9,days_to_predict+1)]
+    hourly_website_tags = ['wetterbericht-aktuell', 'wetterbericht-morgen', 'wetterbericht-uebermorgen','wetter-bericht','wettervorhersage','wetter-vorhersage','wettervorschau','wetter-vorschau']
+    hourly_website_tags.extend(tag_tags)
+    for i, tag in enumerate(hourly_website_tags):
+        url = url_hourly_base+tag+'.html'
+        raw_html.append(simple_get(url))
+        
+    return raw_html
+
+
+# In[26]:
+
+
+for i,city in enumerate(cities):
+    html_dict = {}
+    current_time = pd.Timestamp(datetime.now())
+
+    html_dict['website'] = 'www.wetter.de'   
+    html_dict['city'] = city
+    html_dict['date_of_aquisition'] = current_time
+    html_dict['htmls'] = collect_htmls(base_url[i])
+    pkl_name='./wetter_de/wetter_de_'+city+'_'+str(current_time)[:10]+'.pkl'
+    f = open(pkl_name,"wb")
+    pickle.dump(html_dict,f)
+    f.close()
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..464453f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+# webscraping_2018
+This repository has the set of files that gather information from the websites bild.de and wetter.de as a webscraping service, and from the weather channel by RESTful API calls.
+The scripts that __gather the data__ run on a server as cronjobs. The way they run is described by: `crontab_info.txt`
+
+The structure for the RESTful API calls is the following:
+  - `api_info.py` has the necessary information to access the wunderground API.
+  
+  - `constants.py` has the global constants used across API scripts.
+  
+  - `city_location.py` is the script that gets the coordinates of specified named cities.
+  
+  - `daily_structured.py` is the script that __gathers daily data__.
+  
+  - `hourly_structured.py` is the script that __gathers hourly data__.
+  
diff --git a/Web_Scraping_wetter_de_day_periods.py b/Web_Scraping_wetter_de_day_periods.py
new file mode 100644
index 0000000..a09d9c1
--- /dev/null
+++ b/Web_Scraping_wetter_de_day_periods.py
@@ -0,0 +1,177 @@
+
+# coding: utf-8
+
+# In[51]:
+
+
+from requests import get
+from requests.exceptions import RequestException
+from contextlib import closing
+from bs4 import BeautifulSoup
+#from datetime import datetime
+from datetime import timedelta
+import pandas as pd
+import urllib3
+import datetime
+import time
+import os
+import db_manager
+
+# -*- coding: utf -*-
+
+
+# In[52]:
+
+
+def simple_get(url):
+    """
+    Attempts to get the content at `url` by making an HTTP GET request.
+    If the content-type of response is some kind of HTML/XML, return the
+    text content, otherwise return None
+    """
+    try:
+        with closing(get(url, stream=True)) as resp:
+            if is_good_response(resp):
+                return resp.content
+            else:
+                return None
+
+    except RequestException as e:
+        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
+        return None
+
+
+def is_good_response(resp):
+    """
+    Returns true if the response seems to be HTML, false otherwise
+    """
+    content_type = resp.headers['Content-Type'].lower()
+    return (resp.status_code == 200 
+            and content_type is not None 
+            and content_type.find('html') > -1)
+
+
+def log_error(e):
+    """
+    It is always a good idea to log errors. 
+    This function just prints them, but you can
+    make it do anything.
+    """
+    print(e)
+    
+def find_between(s, first, last):
+    try:
+        start = s.index(first) + len(first)
+        end = s.index(last, start)
+        return s[start:end]
+    except ValueError:
+        return ""
+    
+def cut_string(s, cut):
+    try:
+        cut_from = s.index(cut) + len(cut)
+        return s[cut_from:]
+    except ValueError:
+        return ""
+
+
+# In[53]:
+
+
+class forecast(object):
+    def __init__(max_temp, min_temp, proc_date, acc_date):
+        self.max_temp = max_temp
+        self.min_temp = min_temp
+        self.proc_date = proc_date
+        self.acc_date = acc_date
+
+def create_weather_df(url, http, current_time):
+    
+    data = {}
+    soup = BeautifulSoup(http.request('GET',url).data,'lxml')
+    daily_periods_dict = {}
+
+    proc_date = []
+    temp = []
+    rain = []
+    wind = []
+    condition = []
+    rain_l = []
+
+    for day in range(15):
+        for h in range(4):
+            dt = (current_time + timedelta(days=day)).date()
+            proc_date.append(datetime.datetime.combine(dt,datetime.time(h*6+2)).strftime('%Y%m%d%H'))
+
+    period_forcast = soup.findAll("div", {"class":'forecast-column column-1 wt-border-radius-6'})
+    for period in period_forcast:
+
+        temp.append(int(period.find('div', {'class':"forecast-text-temperature wt-font-light"}).text[:-1]))
+        condition.append(period.find('div', {'class':"forecast-column-condition"}).text)
+
+        rain_html = period.find("div", {"class":'forecast-column-rain'})
+
+        r = rain_html.findAll('span', {'class':"wt-font-semibold"})
+        if len(r) > 1:
+            rain.append(int(r[0].text[:-1]))
+            rain_l.append(r[1].text[:-4])
+        else:
+            rain.append(int(rain_html.find('span', {'class':"wt-font-semibold"}).text[:-1]))    
+            rain_l.append(None)
+
+        wind_html = period.find("div", {"class":'forecast-column-wind'})
+        wind.append(int(wind_html.find('span', {'class':"wt-font-semibold"}).text[1:-5])) 
+
+    daily_periods_dict['date_for_which_weather_is_predicted'] = proc_date
+
+    daily_periods_dict['temperature'] = temp
+    daily_periods_dict['wind_speed'] = wind
+    daily_periods_dict['precipitation_per'] = rain
+
+    daily_periods_dict['precipitation_l'] = rain_l
+    daily_periods_dict['condition'] = condition
+
+    daily = pd.DataFrame(daily_periods_dict)
+    return daily
+
+
+# In[54]:
+
+
+cities=['Berlin', 'Hamburg', 'Munich', 'Cologne', 'Frankfurt_am_Main']
+
+urls=['https://www.wetter.de/deutschland/wetter-berlin-18228265/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-hamburg-18219464/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-muenchen-18225562/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-koeln-18220679/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-frankfurt-18221009/wetterprognose.html']
+
+http = urllib3.PoolManager()
+current_time = pd.Timestamp(datetime.datetime.now())
+df = pd.DataFrame()
+
+for i,city in enumerate(cities):
+    url = urls[i]
+    cdf = create_weather_df(url,http,current_time)    
+    cdf['city'] = city
+    df = df.append(cdf)
+
+df['website'] = 'https://www.wetter.de'
+df['wind_direction'] = None
+df['date_of_acquisition'] = current_time.strftime('%Y%m%d%H')
+    
+# pkl_name='./wetter_de/day_periods/'+current_time.strftime('%Y%m%d%H')+'.pkl'
+df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
+df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date())
+
+#pkl_name='./wetter_de/daily/'+current_time.strftime('%Y%m%d%H')+'.pkl'
+try:
+    db_manager.insert_df("DailyPeriodPrediction", df)
+finally:
+    filename = os.path.expanduser('~/Documents/webscraping_2018/data_wetter_de/day_periods')
+    timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
+    filename += timestamp + ".pkl"
+    df.to_pickle(filename)
+
+
+
diff --git a/Web_Scraping_wetter_de_full_day.py b/Web_Scraping_wetter_de_full_day.py
new file mode 100644
index 0000000..be30596
--- /dev/null
+++ b/Web_Scraping_wetter_de_full_day.py
@@ -0,0 +1,163 @@
+
+# coding: utf-8
+
+# In[23]:
+
+
+from requests import get
+from requests.exceptions import RequestException
+from contextlib import closing
+from bs4 import BeautifulSoup
+from datetime import timedelta
+import pandas as pd
+import urllib3
+import datetime
+import time
+import os
+import db_manager
+
+# -*- coding: utf -*-
+
+
+# In[24]:
+
+
+def simple_get(url):
+    """
+    Attempts to get the content at `url` by making an HTTP GET request.
+    If the content-type of response is some kind of HTML/XML, return the
+    text content, otherwise return None
+    """
+    try:
+        with closing(get(url, stream=True)) as resp:
+            if is_good_response(resp):
+                return resp.content
+            else:
+                return None
+
+    except RequestException as e:
+        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
+        return None
+
+
+def is_good_response(resp):
+    """
+    Returns true if the response seems to be HTML, false otherwise
+    """
+    content_type = resp.headers['Content-Type'].lower()
+    return (resp.status_code == 200 
+            and content_type is not None 
+            and content_type.find('html') > -1)
+
+
+def log_error(e):
+    """
+    It is always a good idea to log errors. 
+    This function just prints them, but you can
+    make it do anything.
+    """
+    print(e)
+    
+def find_between(s, first, last):
+    try:
+        start = s.index(first) + len(first)
+        end = s.index(last, start)
+        return s[start:end]
+    except ValueError:
+        return ""
+    
+def cut_string(s, cut):
+    try:
+        cut_from = s.index(cut) + len(cut)
+        return s[cut_from:]
+    except ValueError:
+        return ""
+
+
+# In[25]:
+
+
+class forecast(object):
+    def __init__(max_temp, min_temp, proc_date, acc_date):
+        self.max_temp = max_temp
+        self.min_temp = min_temp
+        self.proc_date = proc_date
+        self.acc_date = acc_date
+        
+def create_weather_df(url, http, current_time):
+    
+    data = {}
+    soup = BeautifulSoup(http.request('GET',url).data,'lxml')
+    daily_periods_dict = {}
+
+    proc_date = []
+    temp_min = []
+    temp_max = []
+    condition = []
+
+    for day in range(15):
+        dt = (current_time + timedelta(days=day)).date()
+        proc_date.append(dt.strftime('%Y%m%d%H'))
+
+    day_forcast = soup.findAll("div", {"class":'forecast-day'})
+    for day in day_forcast:    
+
+        temps = day.find('div', {"class":'forecast-day-temperature'})
+        temp_min.append(int(temps.find('span', {'class':"wt-color-temperature-max"}).text[:-1]))
+        temp_max.append(int(temps.find('span', {'class':"wt-color-temperature-min"}).text[:-1]))
+
+        cond = str(day.find('div', {'class':"forecast-day-image"}))
+        condition.append(find_between(cond,'<!-- key: ','  -->'))
+
+    daily_periods_dict['date_for_which_weather_is_predicted'] = proc_date
+
+    daily_periods_dict['temperature_min'] = temp_min
+    daily_periods_dict['temperature_max'] = temp_max
+    daily_periods_dict['condition'] = condition
+
+    daily = pd.DataFrame(daily_periods_dict)
+    return daily
+
+
+# In[26]:
+
+
+cities=['Berlin', 'Hamburg', 'Munich', 'Cologne', 'Frankfurt_am_Main']
+
+urls=['https://www.wetter.de/deutschland/wetter-berlin-18228265/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-hamburg-18219464/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-muenchen-18225562/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-koeln-18220679/wetterprognose.html',
+      'https://www.wetter.de/deutschland/wetter-frankfurt-18221009/wetterprognose.html']
+
+http = urllib3.PoolManager()
+current_time = pd.Timestamp(datetime.datetime.now())
+df = pd.DataFrame()
+
+for i,city in enumerate(cities):
+    url = urls[i]
+    cdf = create_weather_df(url,http,current_time)    
+    cdf['city'] = city
+    df = df.append(cdf)
+
+df['wind_speed'] = None
+df['humidity'] = None
+df['precipitation_per'] = None
+df['precipitation_l'] = None
+df['wind_direction'] = None
+df['snow'] = None
+df['uvi'] = None
+
+df['website'] = 'https://www.wetter.de'
+df['date_of_acquisition'] = current_time.strftime('%Y%m%d%H')
+df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
+df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date())
+    
+#pkl_name='./wetter_de/daily/'+current_time.strftime('%Y%m%d%H')+'.pkl'
+try:
+        db_manager.insert_df("DailyPrediction", df)
+finally:
+        filename = os.path.expanduser('~/Documents/webscraping_2018/data_wetter_de/daily')
+        timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
+        filename += timestamp + ".pkl"
+        df.to_pickle(filename)
diff --git a/Wetter_de_scraping.py b/Wetter_de_scraping.py
new file mode 100644
index 0000000..76e5282
--- /dev/null
+++ b/Wetter_de_scraping.py
@@ -0,0 +1,109 @@
+from bs4 import BeautifulSoup
+import urllib3
+import time
+import datetime
+import pandas as pd
+import numpy as np
+import pickle
+import os
+import db_manager
+
+
+days_to_predict = 15
+http = urllib3.PoolManager()
+cities = ['Berlin','Hamburg', 'Munich', 'Cologne', 'Frankfurt']
+cities_tags = ['berlin-18228265/' ,'hamburg-18219464/', 'muenchen-18225562/', 'koeln-18220679/', 'frankfurt-18221009/']
+url_hourly_base = 'https://www.wetter.de/deutschland/wetter-'
+tag_tags = ['tag-'+str(tag) for tag in range(9,days_to_predict+1)]
+hourly_website_tags = ['wetterbericht-aktuell', 'wetterbericht-morgen', 'wetterbericht-uebermorgen','wetter-bericht','wettervorhersage','wetter-vorhersage','wettervorschau','wetter-vorschau']
+hourly_website_tags.extend(tag_tags)
+
+wind_mapping = { 'Nord': 'N', 'Ost':'E', 'West':'W', 'Süd':'S',
+                'Nordost':'NE','Nordnordost':'NNE', 'Nordostost':'NEE',
+                'Südost':'SE','Südsüdost':'SSE', 'Südostost':'SEE',
+                'Ostnordost':'ENE', 'Ostsüdost':'ESE',
+                'Nordwest':'NW', 'Nordnordwest':'NNW', 'Nordwestwest':'NWW',
+                'Südwest':'SW', 'Südsüdwest':'SSW', 'Südwestwest':'SWW',
+                'Westnordwest':'WNW', 'Westsüdwest':'WSW',
+                'Ostnord':'EN', 'Ostostnord':'EEN', 'Ostnordnord':'ENN',
+                'Westnord':'WN','Westwestnord':'WWN', 'Westnordnord':'WNN',
+                'Nordostnord':'NEN', 'Nordwestnord':'NWN',
+                'Ostsüd':'ES', 'Ostostsüd':'EES', 'Ostsüdsüd':'ESS',
+                'Westsüd':'WS','Westwestsüd':'WWS', 'Westsüdsüd':'WSS',
+                'Südostsüd':'SES', 'Südwestsüd':'SWS',
+               }
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+number_of_features = 9 #e.g. date_for_which_weather_is_predicted, cities, temperature, wind ect.
+number_of_cities = len(cities)
+number_of_predictions = number_of_cities*len(hourly_website_tags)*25
+
+current_time_date = datetime.datetime.now().strftime('%Y%m%d%H')
+hourly_dict = {}
+hourly_dict['website'] = ['Wetter.de']*number_of_predictions
+hourly_dict['date_of_acquisition'] = [current_time_date]*number_of_predictions
+
+all_features = np.empty((number_of_cities,len(hourly_website_tags),25,number_of_features), dtype=object)
+for ci, city in enumerate(cities):
+    url_hourly_base_city = url_hourly_base+cities_tags[ci]
+    for i, tag in enumerate(hourly_website_tags):
+        url = url_hourly_base_city+tag+'.html'
+        soup = BeautifulSoup(http.request('GET',url).data, "html5lib")
+        dates_for_predicted_days = [str(datetime.date.today() + datetime.timedelta(days=i)) for i in range(days_to_predict)]
+        day_to_predict = dates_for_predicted_days[i].replace("-","")
+        hourly_info = soup.findAll('div',class_="column column-4 forecast-detail-column-1h")
+        for hi, info in enumerate(hourly_info):
+            all_features[ci][i][hi][0] = city
+            hour = info.find('div',class_="forecast-date").text[0:2]
+            prediction_for = str(day_to_predict)+str(hour)
+            all_features[ci][i][hi][1] = prediction_for
+            temp_info = info.find('div', class_="forecast-temperature")
+            temp = temp_info.find('span',class_="temperature").text.replace("°","")
+            all_features[ci][i][hi][2] = temp
+            wind_info = info.find('div',class_="forecast-wind")
+            wind = wind_info.find('span',class_="wt-font-semibold").text.split("/")[0][1:-3]
+            all_features[ci][i][hi][3] = wind
+            humidity_info = info.find('div',class_="forecast-humidity-text")
+            humidity = humidity_info.find('span',class_="wt-font-semibold").text.replace("%","")
+            all_features[ci][i][hi][4] = humidity
+            rain_info = info.find('div',class_="forecast-rain")
+            rain_perecnt = rain_info.find('span',class_="wt-font-semibold").text.replace("%","")
+            all_features[ci][i][hi][5] = rain_perecnt
+            if int(rain_perecnt) > 0:
+                rain_liter = rain_info.find_all('span',class_="wt-font-semibold")[-1].text.split("/")[0][0:-2]
+                all_features[ci][i][hi][6] = float(rain_liter.replace(",","."))
+            else:
+                all_features[ci][i][hi][6] = None
+            wind_text_ger = wind_info.find('div',class_="forecast-wind-text").text.split("aus")[1].split("\n")[0].replace(" ","")
+            if wind_text_ger in wind_mapping:
+                wind_text = wind_mapping[wind_text_ger]
+            else:
+                wind_text = None
+            all_features[ci][i][hi][7] = wind_text
+            temp_condition = temp_info.find('span',class_="temperature-condition").text
+            all_features[ci][i][hi][8] = temp_condition
+all_features = all_features.reshape(number_of_predictions,number_of_features)
+
+hourly_dict['city'] = list(all_features[:,0])
+hourly_dict['date_for_which_weather_is_predicted'] = list(all_features[:,1])
+hourly_dict['temperature'] = list(all_features[:,2])
+hourly_dict['wind_speed'] = list(all_features[:,3])
+hourly_dict['humidity'] = list(all_features[:,4])
+hourly_dict['precipitation_per'] = list(all_features[:,5])
+hourly_dict['precipitation_l'] = list(all_features[:,6])
+hourly_dict['wind_direction'] = list(all_features[:,7])
+hourly_dict['condition'] = list(all_features[:,8])
+hourly_dict['snow'] = [None]*number_of_predictions
+hourly_dict['uvi'] = [None]*number_of_predictions
+
+df = pd.DataFrame(data=hourly_dict)
+df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
+df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date())
+try:
+    db_manager.insert_df("HourlyPrediction", df)
+finally:
+    filename = os.path.expanduser('~/Documents/webscraping_2018/data_wetter_de/hourly_period_')
+    timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
+    filename += timestamp + ".pkl"
+    df.to_pickle(filename)
diff --git a/bild_scraping.py b/bild_scraping.py
new file mode 100644
index 0000000..0b8ccd5
--- /dev/null
+++ b/bild_scraping.py
@@ -0,0 +1,232 @@
+# coding: utf-8
+#
+# Created by Pooja Subramaniam and Marc Aurel Vischer on Tue, May 8.
+# Temperature is given as a tuple of daily high and low value, both in degrees Celsius as ints.
+# Precipitation is given as "probability" as float.
+# Wind is given as a tuple of strength in Bft (int) and direction
+#(e.g. "NE" if wind _comes from_ north east).
+
+
+import urllib3
+from bs4 import BeautifulSoup
+import pandas as pd
+import warnings
+import os
+import datetime
+import db_manager
+
+#FIRST PART: ONCE-A-DAY PREDICTIONS
+#These are the urls referring directly to high, low temperature
+hi_lo_url = "https://wetter.bild.de/web2014/ifr-wetter-deutschland.asp"
+prec_url = "https://wetter.bild.de/web2014/ifr-niederschlag-deutschland.asp"
+wind_url = "https://wetter.bild.de/web2014/ifr-windstaerken-deutschland.asp"
+
+#load and parse page
+http = urllib3.PoolManager()
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", category = urllib3.exceptions.InsecureRequestWarning)
+    hi_lo_bs = BeautifulSoup(http.request('GET', hi_lo_url).data, "html.parser")
+    prec_bs = BeautifulSoup(http.request('GET',prec_url).data, "html.parser")
+    wind_bs = BeautifulSoup(http.request('GET',wind_url).data, "html.parser")
+#print(hi_lo.prettify())
+
+#EXTRACT DATA AND SAVE INTO DICTIONARIES:
+#TEMPERATURE HIGH/LOW, bild has today + 5 days forecast for that
+#iterate over days, extract day layer for each
+temp_dicts = []
+for day in range(6):
+    # extract current day layer
+    day_layer = hi_lo_bs.find_all('div', id="wk_layer_wr{}".format(day))
+    #print(day_layer[0]['id'])
+    if len(day_layer)!=1:
+        raise Exception("Found more than one layer for single day.")
+
+    # extract all the cities from that layer
+    day_cities = day_layer[0].find_all('div', class_="wk_map_text")
+    day_dict = {}
+    for city in day_cities:
+        hi_lo_str = city.nobr.next_sibling.next_sibling
+        high = int(hi_lo_str.split('|')[0].split('°')[0])
+        low = int(hi_lo_str.split('|')[1].split('°')[0])
+        day_dict[city.nobr.string] = (high, low)
+    temp_dicts.append(day_dict)
+
+#PRECIPITATION,  bild has only today + 2 days forecast for that
+#iterate over days, extract day layer for each
+prec_dicts = []
+for day in range(1,4): #layer 0 corresponds to next 6 hrs, layer 1 to entire current day
+    # extract current day layer
+    day_layer = prec_bs.find_all('div', id="wk_layer_wr{}".format(day))
+    #print(day_layer[0]['id'])
+    if len(day_layer)!=1:
+        raise Exception("Found more than one layer for single day.")
+
+    # extract all the cities from that layer
+    day_cities = day_layer[0].find_all('div', class_="wk_map_text")
+    day_dict = {}
+    for city in day_cities:
+        prec_str = city.nobr.next_sibling.next_sibling
+        prec_value = int(prec_str.split()[0])/100
+        day_dict[city.nobr.string] = prec_value
+    prec_dicts.append(day_dict)
+
+#WIND,  bild again has today + 5 days forecast
+WIND_GER_ENG = {"w":"W", "nw":"NW", "n":"N", "no":"NE", "o":"E", "so":"SE", "s":"S", "sw":"SW"}
+#iterate over days, extract day layer for each
+wind_dicts = []
+for day in range(6):
+    # extract current day layer
+    day_layer = wind_bs.find_all('div', id="wk_layer_wr{}".format(day))
+    #print(day_layer[0]['id'])
+    if len(day_layer)!=1:
+        raise Exception("Found more than one layer for single day.")
+
+    # extract all the cities from that layer
+    day_cities = day_layer[0].find_all('div', class_="wk_map_text")
+    day_dict = {}
+    for city in day_cities:
+        wind_str = city.nobr.next_sibling.next_sibling
+        wind_strength = int(wind_str.split()[0])
+        wind_symbol_url = city.parent.img['src']
+        wind_direction_raw = wind_symbol_url.split('.')[0].split('/')[-1]
+        wind_direction = WIND_GER_ENG[wind_direction_raw]
+        day_dict[city.nobr.string] = (wind_strength,wind_direction)
+    wind_dicts.append(day_dict)
+
+#BUNDLE THE INDIVIDUAL DICTIONARIES INTO A SINGLE DICT, SAVE AS PD DATAFRAME
+date_of_acquisition = datetime.datetime.now() #for timestamp
+website = ['Bild.de']
+#storing cities as a dictionary of german name : english name,
+#so .keys() and .values() gives the list of cities in german and english respectively
+cities = {"Berlin":"Berlin", "Frankfurt":"Frankfurt", "Hamburg":"Hamburg",
+          "Köln":"Cologne", "München":"Munich"}
+
+daily_dict = {'website':[], 'date_for_which_weather_is_predicted':[], 'city':[],
+              'date_of_acquisition':[], 'temperature_max':[], 'temperature_min':[],
+              'wind_speed':[], 'humidity':[], 'precipitation_per':[],
+              'precipitation_l':[], 'wind_direction':[], 'condition':[], 'snow':[], 'uvi':[]}
+
+
+for i,city in enumerate(cities):
+    for days in range(6):
+        daily_dict['website'].append(website)
+        daily_dict['date_for_which_weather_is_predicted'].append(
+                datetime.datetime.now().strftime('%Y%m%d%H'))
+        daily_dict['city'].append(cities[city])
+        print((date_of_acquisition+datetime.timedelta(days)))
+        daily_dict['date_of_acquisition'].append(
+                (date_of_acquisition+datetime.timedelta(days)).strftime('%Y%m%d%H'))
+
+        daily_dict['temperature_max'].append(temp_dicts[days][city][0])
+        daily_dict['temperature_min'].append(temp_dicts[days][city][1])
+        daily_dict['wind_speed'].append(wind_dicts[days][city][0])
+        daily_dict['wind_direction'].append(wind_dicts[days][city][1])
+        daily_dict['humidity'].append(None)
+
+        #bild has precipitation forecasts only for the next 2 days
+        if days<2:
+            daily_dict['precipitation_per'].append(prec_dicts[days+1][city]*100)
+        else:
+            daily_dict['precipitation_per'].append(None)
+
+        daily_dict['precipitation_l'].append(None)
+        daily_dict['condition'].append(None)
+        daily_dict['snow'].append(None)
+        daily_dict['uvi'].append(None)
+
+#convert to dataframe and save to file
+df_daily = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in daily_dict.items() ]))
+print(df_daily)
+df_daily.date_of_acquisition = df_daily.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d%H').date())
+print(df_daily.date_for_which_weather_is_predicted)
+df_daily.date_for_which_weather_is_predicted = df_daily.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d%H').date())
+try:
+    db_manager.insert_df("DailyPrediction", df_daily)
+finally:
+    filename = os.path.expanduser('~/Documents/webscraping_2018/data_bild/daily/daily_')
+    timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
+    filename += timestamp + ".pkl"
+    df_daily.to_pickle(filename)
+
+#SECOND PART: FOUR-TIMES-A-DAY PREDICTIONS
+#scrape specified cities for morning, noon, afternoon, night, extract temperature,
+# precipitation in percent and condition
+
+PREDICTION_TIMES = [datetime.timedelta(days=0, hours=8), #morning
+                    datetime.timedelta(days=0, hours=14), #afternoon
+                    datetime.timedelta(days=0, hours=20), #evening
+                    datetime.timedelta(days=1, hours=2)] #night (tomorrow)
+
+
+#first we need the specific url for each city
+city_query_url = 'https://wetter.bild.de/web2014/vorhersage-ort.asp?id='
+city_ids_dict = {'Berlin': '10115-berlin',
+                 'Frankfurt': '65931-frankfurt-am-main',
+                 'Hamburg': '22305-hamburg',
+                 'Köln' : '50668-koeln',
+                 'München' : '80331-muenchen'}
+
+
+#for the sake of clarity, i tried to be as consistent as possible with
+#Pooja's code (daily_dict above) when it comes to saving the data as a dataframe
+#
+#data will be saved into this dictionary before being converted to a dataframe
+daily_periods_dict = {'website':[],'date_for_which_weather_is_predicted':[],
+                      'city':[], 'date_of_acquisition':[],
+                      'temperature':[],'wind_speed':[],'precipitation_per':[],
+                      'precipitation_l':[],'wind_direction':[],'condition':[]}
+
+for city in cities:
+    #parse html for each city
+    city_url = city_query_url + city_ids_dict[city]
+    city_html = http.request('GET', city_url).data.decode('utf-8')
+    #CAREFUL!!! there is a mistake in the website: there is a /span that doesn't have a match
+    #we need to remove it manually before parsing
+    city_html_fixed = city_html.replace("VORMITTAG</span>","VORMITTAG")
+    city_bs = BeautifulSoup(city_html_fixed, "html.parser")
+
+    #get the table containing the four-times-a-day forecast and extract the data
+    four_table = city_bs.find_all('table', class_='wk_forecast_tbl')[1]
+    # using the magic number here to index this is a bit shitty but there are several
+    #tables that are all of the class 'wk_forecast_tbl'
+
+    daytimes = four_table.find_all('td', class_="wk_bottomline wk_subheader")
+    for i,daytime in enumerate(daytimes):
+        siblings = [sibling for sibling in daytime.next_siblings]
+        temp_raw = siblings[3]
+        temp = int(temp_raw.text.split('°')[0])
+        condition = siblings[5].text
+        precip_raw = siblings[7].span.next_sibling.next_sibling.next_sibling.next_sibling
+        precip = int(precip_raw.split('%')[0])
+        #a bit of date arithmetic here:
+        today_00 = datetime.datetime.combine(
+                datetime.date.today(), datetime.time(0,0,0)) #gives today at 00
+        prediction_datetime = today_00 + PREDICTION_TIMES[i] #time delta from today 00:00
+
+        daily_periods_dict['website'].append(city_url)
+        daily_periods_dict['date_for_which_weather_is_predicted'].append(
+            prediction_datetime.strftime('%Y%m%d%H'))
+        daily_periods_dict['city'].append(city)
+        daily_periods_dict['date_of_acquisition'].append(
+            datetime.datetime.now().strftime('%Y%m%d%H'))
+        daily_periods_dict['temperature'].append(temp)
+        daily_periods_dict['wind_speed'].append(None)
+        daily_periods_dict['precipitation_per'].append(precip)
+        daily_periods_dict['precipitation_l'].append(None)
+        daily_periods_dict['wind_direction'].append(None)
+        daily_periods_dict['condition'].append(condition)
+
+
+#convert to dataframe and save to file
+df = pd.DataFrame(daily_periods_dict)
+df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
+df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
+
+try:
+    pass
+    db_manager.insert_df("DailyPeriodPrediction", df)
+finally:
+    filename = os.path.expanduser('~/Documents/webscraping_2018/data_bild/daily_period/daily_period_')
+    timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
+    filename += timestamp + ".pkl"
+    df.to_pickle(filename)
diff --git a/city_location.py b/city_location.py
new file mode 100644
index 0000000..5afbd39
--- /dev/null
+++ b/city_location.py
@@ -0,0 +1,28 @@
+import requests
+import time
+import constants
+
+def get_coordinates(city):
+    response = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address=' + city)
+
+    resp_json_payload = response.json()
+    return resp_json_payload['results'][0]['geometry']['location']
+
+
+
+
+def map_cities(cities):
+    dic = {}
+    for city in cities:
+        time.sleep(5)
+        print(city)
+        coordinates = get_coordinates(city)
+        dic[city] = (coordinates['lat'], coordinates['lng'])
+        print()
+        time.sleep(10)
+        print(dic[city])
+    return dic
+
+#cities=['BERLIN', 'HAMBURG', 'MUNICH', 'FRANKFURT', 'COLOGNE']
+cities = constants.CITIES
+print(map_cities(cities))
diff --git a/constants.py b/constants.py
new file mode 100644
index 0000000..40e89e8
--- /dev/null
+++ b/constants.py
@@ -0,0 +1,18 @@
+# File with important common constants for the API scripts
+import api_info
+
+KEY = api_info.KEY
+BASE_URL = "http://api.wunderground.com/api/"+ KEY +"/hourly10day/q/"
+DAILY_BASE_URL = "http://api.wunderground.com/api/"+ KEY +"/forecast10day/q/"
+
+FILENAME = "hourly_forecast.json"
+FOLDERNAME = "/home/danielv/Documents/webscraping_2018/data/"
+CITIES = ["BERLIN", "HAMBURG", "MUNICH", "COLOGNE", "FRANKFURT"]
+
+#Coordinates
+coordinates = { 'BERLIN': (52.52000659999999, 13.404954),
+                'MUNICH': (48.1351253,  11.5819805),
+                'HAMBURG': (53.5510846, 9.9936819),
+                'FRANKFURT': (50.1109221, 8.6821267),
+                'COLOGNE': (50.937531, 6.9602786)
+            }
diff --git a/crontab_info.txt b/crontab_info.txt
new file mode 100644
index 0000000..101cc0b
--- /dev/null
+++ b/crontab_info.txt
@@ -0,0 +1,8 @@
+0 8,20 * * * /usr/bin/python3 /home/danielv/Documents/webscraping_2018/hourly_db.py >> /home/danielv/cron_weather.log 2>&1
+0 8,20 * * * /usr/bin/python3 /home/danielv/Documents/webscraping_2018/daily_db.py >> /home/danielv/cron_weather_daily.log 2>&1
+0 2,8,14,20 * * * /home/danielv/anaconda3/bin/python /home/danielv/Documents/webscraping_2018/bild_scraping.py >> /home/danielv/cron_bild.log 2>1
+0 8,20 * * * /home/danielv/anaconda3/bin/python /home/danielv/Documents/webscraping_2018/Wetter_de_scraping.py >> /home/danielv/cron_wetter_de.log 2>&1
+
+0 8,20 * * * /usr/bin/python3 /home/danielv/Documents/webscraping_2018/Web_Scraping_wetter_de_full_day.py >> /home/danielv/cron_wetter_full.log 2>&1
+
+0 2,8,14,20 * * * /home/danielv/anaconda3/bin/python /home/danielv/Documents/webscraping_2018/Web_Scraping_wetter_de_day_periods.py >> /home/danielv/cron_wetter_daily.log 2>1
diff --git a/daily_db.py b/daily_db.py
new file mode 100644
index 0000000..05521db
--- /dev/null
+++ b/daily_db.py
@@ -0,0 +1,107 @@
+import requests
+import time
+import datetime
+import json
+import constants
+import pandas as pd
+import pickle
+import db_manager
+
+def get_response(query):
+    """
+    Access wunderground API to do a get request
+    """
+    try:
+        response = requests.get(constants.DAILY_BASE_URL + query+ ".json")
+        return response.json() if response.ok else None
+    except Exception as e:
+        raise e
+
+def extract_parameters(daily_forecast, city, data):
+    """
+    Extract parameters from request object and store it on data dataFrame
+    """
+    date_ = daily_forecast.get('date')
+    date_predicted = datetime.datetime.fromtimestamp(int(date_.get('epoch'))).strftime('%Y%m%d%H%M')
+    temperature_max = daily_forecast.get('high').get('celsius')
+    temperature_min = daily_forecast.get('low').get('celsius')
+    wind_speed  = daily_forecast.get('avewind').get('kph')
+    humidity    = daily_forecast.get('avehumidity')
+    precipitation_per  = daily_forecast.get('pop')
+    wind_direction = daily_forecast.get('avewind').get('dir')
+    condition = daily_forecast.get('conditions') 
+    snowcm = daily_forecast.get('snow_allday').get('cm')
+    if snowcm: snow = snowcm * 10
+    else: snow = snowcm
+    UVI = None
+    precipitation_l = None
+    website = 'The Weather Channel'
+
+    data['website'].append(website)
+    data['city'].append(city)
+    data['date_of_acquisition'].append(datetime.datetime.now().strftime('%Y%m%d%H'))
+    data['date_for_which_weather_is_predicted'].append(date_predicted)
+    data['temperature_max'].append(temperature_max)
+    data['temperature_min'].append(temperature_min)
+    data['wind_speed'].append(wind_speed)
+    data['humidity'].append(humidity)
+    data['precipitation_per'].append(precipitation_per )
+    data['precipitation_l'].append(precipitation_l)
+    data['wind_direction'].append(wind_direction)
+    data['condition'].append(condition)
+    data['snow'].append(snow)
+    data['uvi'].append(UVI)
+    return data
+
+def gather_daily_city(city, data):
+    latitude, longitude= constants.coordinates.get(city)
+    location = str(latitude)+ "," + str(longitude)
+    response = get_response(location)
+    iterations = 100
+    while(response == None and iterations > 0):
+        response = get_response(location)
+        time.sleep(10)
+        iterations -= 1
+    if(response == None):
+        return data
+
+    daily_forecasts = response.get("forecast").get("simpleforecast").get("forecastday")
+
+    for daily_forecast in daily_forecasts:
+        data = extract_parameters(daily_forecast, city, data)
+    return data
+
+def gather_daily_information():
+    data = {
+        'website' :  [],
+        'city' : [],
+        'date_of_acquisition' : [],
+        'date_for_which_weather_is_predicted' : [],
+        'temperature_max' : [],
+        'temperature_min' : [],
+        'wind_speed' : [],
+        'humidity' : [],
+        'precipitation_per' : [],
+        'precipitation_l' : [],
+        'wind_direction' : [],
+        'condition' : [],
+        'snow' : [],
+        'uvi' : [],
+    }
+    for city in constants.coordinates.keys():
+        data = gather_daily_city(city, data)
+
+    df = pd.DataFrame(data)
+    df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
+    df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date())
+    return df
+
+df = gather_daily_information()
+try:
+    if(df.size > 0):
+        db_manager.insert_df("DailyPrediction", df)
+finally:
+    if(df.size > 0): 
+        timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M')
+        filename = "/home/danielv/Documents/webscraping_2018/data_daily/" + timestamp + ".pkl"
+        df.to_pickle(filename)
diff --git a/daily_structured.py b/daily_structured.py
new file mode 100644
index 0000000..cc54a0c
--- /dev/null
+++ b/daily_structured.py
@@ -0,0 +1,101 @@
+import requests
+import time
+import datetime
+import json
+import constants
+import pandas as pd
+import pickle
+
+def get_response(query):
+    """
+    Access wunderground API to do a get request
+    """
+    try:
+        response = requests.get(constants.DAILY_BASE_URL + query+ ".json")
+        return response.json() if response.ok else None
+    except Exception as e:
+        raise e
+
+def extract_parameters(daily_forecast, city, data):
+    """
+    Extract parameters from request object and store it on data dataFrame
+    """
+    date_ = daily_forecast.get('date')
+    date_predicted = datetime.datetime.fromtimestamp(int(date_.get('epoch'))).strftime('%Y%m%d%H%M')
+    temperature_max = daily_forecast.get('high').get('celsius')
+    temperature_min = daily_forecast.get('low').get('celsius')
+    wind_speed  = daily_forecast.get('avewind').get('kph')
+    humidity    = daily_forecast.get('avehumidity')
+    precipitation_per  = daily_forecast.get('pop')
+    wind_direction = daily_forecast.get('avewind').get('dir')
+    condition = daily_forecast.get('conditions') 
+    snowcm = daily_forecast.get('snow_allday').get('cm')
+    if snowcm: snow = snowcm * 10
+    else: snow = snowcm
+    UVI = None
+    precipitation_l = None
+    website = 'The Weather Channel'
+
+    data['website'].append(website)
+    data['city'].append(city)
+    data['date_of_acquisition'].append(datetime.datetime.now().strftime('%Y%m%d%H'))
+    data['date_for_which_weather_is_predicted'].append(date_predicted)
+    data['temperature_max'].append(temperature_max)
+    data['temperature_min'].append(temperature_min)
+    data['wind_speed'].append(wind_speed)
+    data['humidity'].append(humidity)
+    data['precipitation_per'].append(precipitation_per )
+    data['precipitation_l'].append(precipitation_l)
+    data['wind_direction'].append(wind_direction)
+    data['condition'].append(condition)
+    data['snow'].append(snow)
+    data['uvi'].append(UVI)
+    return data
+
+def gather_daily_city(city, data):
+    latitude, longitude= constants.coordinates.get(city)
+    location = str(latitude)+ "," + str(longitude)
+    response = get_response(location)
+    iterations = 100
+    while(response == None and iterations > 0):
+        response = get_response(location)
+        time.sleep(10)
+        iterations -= 1
+    if(response == None):
+        return data
+
+    daily_forecasts = response.get("forecast").get("simpleforecast").get("forecastday")
+
+    for daily_forecast in daily_forecasts:
+        data = extract_parameters(daily_forecast, city, data)
+    return data
+
+def gather_daily_information():
+    data = {
+        'website' :  [],
+        'city' : [],
+        'date_of_acquisition' : [],
+        'date_for_which_weather_is_predicted' : [],
+        'temperature_max' : [],
+        'temperature_min' : [],
+        'wind_speed' : [],
+        'humidity' : [],
+        'precipitation_per' : [],
+        'precipitation_l' : [],
+        'wind_direction' : [],
+        'condition' : [],
+        'snow' : [],
+        'uvi' : [],
+    }
+    for city in constants.coordinates.keys():
+        data = gather_daily_city(city, data)
+
+    df = pd.DataFrame(data)
+    return df
+
+df = gather_daily_information()
+
+if(df.size > 0): 
+    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M')
+    filename = "/home/danielv/Documents/webscraping_2018/data_daily/" + timestamp + ".pkl"
+    df.to_pickle(filename)
diff --git a/database.py b/database.py
new file mode 100644
index 0000000..8a9a9bf
--- /dev/null
+++ b/database.py
@@ -0,0 +1,224 @@
+import pony.orm as porm
+#import database
+import datetime
+#import station_names
+import getpass
+import pandas as pd
+
+from pony.orm.core import ObjectNotFound, TransactionIntegrityError
+
+
+conn_url = 'postgresql://localhost:5432'
+db = porm.Database()
+
+class Station(db.Entity):
+    stations_id   = porm.PrimaryKey(int, auto=False)
+    von_datum     = porm.Optional(datetime.date)
+    bis_datum     = porm.Optional(datetime.date)
+    stationshoehe = porm.Optional(int)
+    geobreite     = porm.Optional(float)
+    geolaenge     = porm.Optional(float)
+    stationsname  = porm.Required(str)
+    bundesland    = porm.Optional(str)
+    measurements  = porm.Set('DailyMeasurement')
+
+    @classmethod
+    def in_city(cls, city_name):
+        return cls.select(lambda s: city_name in s.stationsname)
+
+
+class DailyMeasurement(db.Entity):
+    mess_datum  = porm.Required(datetime.date)
+    stations_id = porm.Required(int)
+    station     = porm.Optional(Station)
+    qn_3        = porm.Optional(int)  # quality level of next columns
+    fx          = porm.Optional(float)
+    fm          = porm.Optional(float)
+    qn_4        = porm.Optional(int)
+    rsk         = porm.Optional(float)
+    rskf        = porm.Optional(float)
+    sdk         = porm.Optional(float)
+    shk_tag     = porm.Optional(float)
+    nm          = porm.Optional(float)
+    vpm         = porm.Optional(float)
+    pm          = porm.Optional(float)
+    tmk         = porm.Optional(float)
+    upm         = porm.Optional(float)
+    txk         = porm.Optional(float)
+    tnk         = porm.Optional(float)
+    tgk         = porm.Optional(float)
+
+    porm.PrimaryKey(mess_datum, stations_id)
+
+    #import math
+    #def before_insert(self):
+    #    for x in self._columns_:
+    #        if isinstance(getattr(self, x), float):
+    #            if math.isnan((getattr(self, x))):
+    #                setattr(self, x, None)
+    #    self.station = Station[self.stations_id]
+
+    #def after_insert(self):
+    #    self.station = Station[self.stations_id]
+
+    #def after_update(self):
+    #    self.station = Station[self.stations_id]
+
+class DailyPrediction(db.Entity):
+    id                 = porm.PrimaryKey(int, auto=True)
+    website            = porm.Required(str)
+    city               = porm.Required(str)
+    date_of_acquisition = porm.Required(datetime.date)
+    date_for_which_weather_is_predicted = porm.Required(datetime.date)
+    temperature_max    = porm.Required(float)
+    temperature_min    = porm.Required(float)
+    wind_speed         = porm.Optional(float, nullable=True)
+    humidity           = porm.Optional(float, nullable=True)
+    precipitation_per  = porm.Optional(float, nullable=True)
+    precipitation_l    = porm.Optional(float, nullable=True)
+    wind_direction     = porm.Optional(str, 3, nullable=True)
+    condition          = porm.Optional(str, nullable=True)
+    snow               = porm.Optional(float, nullable=True)
+    UVI                = porm.Optional(int, unsigned=True)
+
+
+class HourlyPrediction(db.Entity):
+    id                  = porm.PrimaryKey(int, auto=True)
+    website             = porm.Required(str)
+    city                = porm.Required(str)
+    date_of_acquisition = porm.Required(datetime.datetime)
+    date_for_which_weather_is_predicted = porm.Required(datetime.datetime)
+    temperature         = porm.Required(float)
+    wind_speed          = porm.Optional(float)
+    humidity            = porm.Optional(float)
+    precipitation_per   = porm.Optional(float)
+    precipitation_l     = porm.Optional(float)
+    wind_direction      = porm.Optional(str, 3)
+    condition           = porm.Optional(str)
+    snow                = porm.Optional(float)
+    UVI                 = porm.Optional(int, unsigned=True)
+
+
+class DailyPeriodPrediction(db.Entity):
+    id                  = porm.PrimaryKey(int, auto=True)
+    website             = porm.Required(str)
+    city                = porm.Required(str)
+    date_of_acquisition = porm.Required(datetime.datetime)
+    date_for_which_weather_is_predicted = porm.Required(str)
+    temperature         = porm.Required(float)
+    wind_speed          = porm.Optional(float)
+    precipitation_per   = porm.Optional(float)
+    precipitation_l     = porm.Optional(float)
+    wind_direction      = porm.Optional(str, 3)
+    condition           = porm.Optional(str)
+
+
+@porm.db_session
+def set_station_trigger(db):
+    trigger_text = '''
+    create or replace function set_station()
+    returns trigger as '
+    begin
+        new.station := new.stations_id;
+        return new;
+    end;
+    ' language plpgsql;
+    drop trigger if exists set_station on dailymeasurement;
+    create trigger set_station
+    before insert
+    on dailymeasurement
+    for each row
+    execute procedure set_station();
+    '''
+
+    db.execute(trigger_text)
+
+
+def set_up_connection(db, db_name, user='', password=None, host='127.0.0.1', create_tables=False):
+    '''
+    Sets up a connection with the database server.
+    Set create_tables to True if the tables don't exist.
+    '''
+    if password is None:
+        password = getpass.getpass(prompt='postgres user password: ')
+    db.bind(provider='postgres', user=user, password=password, host=host, database=db_name)
+    db.generate_mapping(create_tables = create_tables)
+    global conn_url
+    conn_url = 'postgresql://{}:{}@{}:5432/{}'.format(user, password, host, db_name)
+    if create_tables:
+        set_station_trigger(db)
+
+
+@porm.db_session
+def _insert_without_pandas(df, table_name):
+    table_obj = db.entities[table_name]
+    pk = table_obj._pk_columns_
+
+    if df.index.name is None:
+        df_q = df.set_index(pk)
+    else:
+        df_q = df.copy()
+
+    for i in df_q.index:
+        try:
+            table_obj[i]
+        except ObjectNotFound:
+            try:
+                table_obj(**{**dict(zip(pk, i)),
+                             **df_q.loc[i].to_dict()})
+            except TypeError:
+                table_obj(**{**{pk : i},
+                             **df_q.loc[i].to_dict()})
+
+
+@porm.db_session
+def _insert_with_pandas(df, table_name, auto_id=False, overwrite=False):
+    indices_to_keep = []
+    rows_to_delete = []
+    table_obj = db.entities[table_name]
+
+    if df.index.name is None and not auto_id:
+        df_q = df.set_index(table_obj._pk_columns_)
+    else:
+        df_q = df.copy()
+
+    try:
+        df_q.to_sql(table_name.lower(), conn_url, if_exists='append', index=not auto_id)
+    except:
+        for i in df_q.index:
+            try:
+                row = table_obj[i]
+
+                if overwrite:
+                    rows_to_delete.append(row)
+                    indices_to_keep.append(i)
+
+            except ObjectNotFound:
+                indices_to_keep.append(i)
+
+            except:
+                print(i)
+
+        if overwrite:
+            table_obj.select(lambda x: x in rows_to_delete).delete(bulk = True)
+            porm.commit()
+
+        print('starting insert')
+        df_to_insert = df_q.loc[indices_to_keep]
+        df_to_insert.to_sql(table_name.lower(), conn_url, if_exists='append', index=not auto_id)
+
+
+@porm.db_session
+def insert_into_table(df, table_name, use_pandas=True, auto_id=False, overwrite=False):
+    if use_pandas:
+        _insert_with_pandas(df, table_name, auto_id, overwrite)
+    else:
+        _insert_without_pandas(df, table_name)
+
+
+@porm.db_session
+def query_to_dataframe(query):
+    try:
+        return pd.read_sql_query(query.get_sql(), conn_url)
+    except:
+        return pd.DataFrame([o.to_dict() for o in query])
\ No newline at end of file
diff --git a/db_info.py b/db_info.py
new file mode 100644
index 0000000..a83cb98
--- /dev/null
+++ b/db_info.py
@@ -0,0 +1,5 @@
+#Credentials to log into the database
+
+db_name = "db_webscraping"
+db_user = "webscrapers"
+db_password = "bCCnw3b"
diff --git a/db_manager.py b/db_manager.py
new file mode 100644
index 0000000..c803111
--- /dev/null
+++ b/db_manager.py
@@ -0,0 +1,8 @@
+import database as db
+import db_info
+
+db.set_up_connection(db.db, db_info.db_name, user=db_info.db_user, password=db_info.db_password)
+#TODO add docstring and exceptions
+def insert_df(table_name, df):
+    db.insert_into_table(df, table_name, auto_id=True)
+
diff --git a/hourly_db.py b/hourly_db.py
new file mode 100644
index 0000000..83ca30a
--- /dev/null
+++ b/hourly_db.py
@@ -0,0 +1,118 @@
+import requests
+import time
+import datetime
+import json
+import constants
+import pandas as pd
+import pickle
+import db_manager
+
+def get_response(query):
+    """
+    Access wunderground API to do a get request
+    """
+    try:
+        response = requests.get(constants.BASE_URL + query+ ".json")
+        return response.json() if response.ok else None
+    except Exception as e:
+        raise e
+
+
+def collect_forecast_coords(coords, city):
+    """
+    Stores the json object corresponding to the weather forecast of city in a file.
+    Parameters:
+    coords: dictionary with the city names as keys, and tuple of coordinates as value
+    city: name of the city in a string format 
+    """
+    latitude, longitude= constants.coordinates.get(city)
+    location = str(latitude)+ "," + str(longitude)
+    response = get_response(location)
+    simple_forecast = response.get("hourly_forecast")
+    filename = str(time.time()) + "_" + city + "_" + constants.FILENAME
+    f = open(filename, 'w')
+    json.dump(simple_forecast, f)
+    f.close()
+
+def extract_parameters(hourly_forecast, city, data):
+    fcttime = hourly_forecast.get('FCTTIME')
+    year, month, day, hour = fcttime.get('year'), fcttime.get('mon_padded'), fcttime.get('mday_padded'), fcttime.get('hour_padded')
+    temperature = hourly_forecast.get('temp').get('metric')
+    wind_speed  = hourly_forecast.get('wspd').get('metric')
+    humidity    = hourly_forecast.get('humidity')
+    precipitation_per = hourly_forecast.get('qpf').get('metric') #convert
+    wind_direction = hourly_forecast.get('wdir').get('dir')
+    condition = hourly_forecast.get('condition')
+    snow = hourly_forecast.get('snow').get('metric')
+    UVI = hourly_forecast.get('uvi')
+    precipitation_l = None
+    website = 'The Weather Channel'
+
+    data['website'].append(website)
+    data['city'].append(city)
+    data['date_of_acquisition'].append(datetime.datetime.now().strftime('%Y%m%d%H'))
+    data['date_for_which_weather_is_predicted'].append(year + month + day + hour)
+    data['temperature'].append(temperature)
+    data['wind_speed'].append(wind_speed)
+    data['humidity'].append(humidity)
+    data['precipitation_per'].append(precipitation_per )
+    data['precipitation_l'].append(precipitation_l)
+    data['wind_direction'].append(wind_direction)
+    data['condition'].append(condition)
+    data['snow'].append(snow)
+    data['uvi'].append(UVI)
+    return data
+    #df = pd.DataFrame(data, index=[0])
+
+def gather_hourly_city(city, data):
+    latitude, longitude= constants.coordinates.get(city)
+    location = str(latitude)+ "," + str(longitude)
+    response = get_response(location)
+    iterations = 100
+    while(response == None and iterations > 0):
+        response = get_response(location)
+        iterations -= 1
+        time.sleep(10)
+    if(response == None):
+        return data
+
+    hourly_forecasts = response.get("hourly_forecast")
+
+    for hourly_forecast in hourly_forecasts:
+        data = extract_parameters(hourly_forecast, city, data)
+    return data
+
+def gather_hourly_information():
+    data = {
+        'website' :  [],
+        'city' : [],
+        'date_of_acquisition' : [],
+        'date_for_which_weather_is_predicted' : [],
+        'temperature' : [],
+        'wind_speed' : [],
+        'humidity' : [],
+        'precipitation_per' : [],
+        'precipitation_l' : [],
+        'wind_direction' : [],
+        'condition' : [],
+        'snow' : [],
+        'uvi' : [],
+    }
+    for city in constants.coordinates.keys():
+        data = gather_hourly_city(city, data)
+
+    df = pd.DataFrame(data)
+    df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H'))
+    df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H'))
+    return df
+
+df = gather_hourly_information()
+
+try:
+    if(df.size > 0):
+        db_manager.insert_df("HourlyPrediction", df)
+finally:
+    if(df.size > 0): 
+        timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M')
+        filename = "/home/danielv/Documents/webscraping_2018/data_hourly/" + timestamp + ".pkl"
+        df.to_pickle(filename)
diff --git a/hourly_structured.py b/hourly_structured.py
new file mode 100644
index 0000000..e7f4ffa
--- /dev/null
+++ b/hourly_structured.py
@@ -0,0 +1,111 @@
+import requests
+import time
+import datetime
+import json
+import constants
+import pandas as pd
+import pickle
+
+def get_response(query):
+    """
+    Access wunderground API to do a get request
+    """
+    try:
+        response = requests.get(constants.BASE_URL + query+ ".json")
+        return response.json() if response.ok else None
+    except Exception as e:
+        raise e
+
+
+def collect_forecast_coords(coords, city):
+    """
+    Stores the json object corresponding to the weather forecast of city in a file.
+    Parameters:
+    coords: dictionary with the city names as keys, and tuple of coordinates as value
+    city: name of the city in a string format 
+    """
+    latitude, longitude= constants.coordinates.get(city)
+    location = str(latitude)+ "," + str(longitude)
+    response = get_response(location)
+    simple_forecast = response.get("hourly_forecast")
+    filename = str(time.time()) + "_" + city + "_" + constants.FILENAME
+    f = open(filename, 'w')
+    json.dump(simple_forecast, f)
+    f.close()
+
+def extract_parameters(hourly_forecast, city, data):
+    fcttime = hourly_forecast.get('FCTTIME')
+    year, month, day, hour = fcttime.get('year'), fcttime.get('mon_padded'), fcttime.get('mday_padded'), fcttime.get('hour_padded')
+    temperature = hourly_forecast.get('temp').get('metric')
+    wind_speed  = hourly_forecast.get('wspd').get('metric')
+    humidity    = hourly_forecast.get('humidity')
+    precipitation_per = hourly_forecast.get('qpf').get('metric') #convert
+    wind_direction = hourly_forecast.get('wdir').get('dir')
+    condition = hourly_forecast.get('condition')
+    snow = hourly_forecast.get('snow').get('metric')
+    UVI = hourly_forecast.get('uvi')
+    precipitation_l = None
+    website = 'The Weather Channel'
+
+    data['website'].append(website)
+    data['city'].append(city)
+    data['date_of_acquisition'].append(datetime.datetime.now().strftime('%Y%m%d%H'))
+    data['date_for_which_weather_is_predicted'].append(year + month + day + hour)
+    data['temperature'].append(temperature)
+    data['wind_speed'].append(wind_speed)
+    data['humidity'].append(humidity)
+    data['precipitation_per'].append(precipitation_per )
+    data['precipitation_l'].append(precipitation_l)
+    data['wind_direction'].append(wind_direction)
+    data['condition'].append(condition)
+    data['snow'].append(snow)
+    data['uvi'].append(UVI)
+    return data
+    #df = pd.DataFrame(data, index=[0])
+
+def gather_hourly_city(city, data):
+    latitude, longitude= constants.coordinates.get(city)
+    location = str(latitude)+ "," + str(longitude)
+    response = get_response(location)
+    iterations = 100
+    while(response == None and iterations > 0):
+        response = get_response(location)
+        iterations -= 1
+        time.sleep(10)
+    if(response == None):
+        return data
+
+    hourly_forecasts = response.get("hourly_forecast")
+
+    for hourly_forecast in hourly_forecasts:
+        data = extract_parameters(hourly_forecast, city, data)
+    return data
+
+def gather_hourly_information():
+    data = {
+        'website' :  [],
+        'city' : [],
+        'date_of_acquisition' : [],
+        'date_for_which_weather_is_predicted' : [],
+        'temperature' : [],
+        'wind_speed' : [],
+        'humidity' : [],
+        'precipitation_per' : [],
+        'precipitation_l' : [],
+        'wind_direction' : [],
+        'condition' : [],
+        'snow' : [],
+        'uvi' : [],
+    }
+    for city in constants.coordinates.keys():
+        data = gather_hourly_city(city, data)
+
+    df = pd.DataFrame(data)
+    return df
+
+df = gather_hourly_information()
+
+if(df.size > 0): 
+    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M')
+    filename = "/home/danielv/Documents/webscraping_2018/data_hourly/" + timestamp + ".pkl"
+    df.to_pickle(filename)