From 82b147e3d40dda338c49510fdd09199b16df3b42 Mon Sep 17 00:00:00 2001 From: Carsten Schmotz Date: Wed, 28 Jun 2023 16:55:14 +0200 Subject: [PATCH] test --- data/AutomatedDataPipeline.py | 47 ++++++++++++++++++ data/tablefilter.py | 92 +++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 data/AutomatedDataPipeline.py create mode 100644 data/tablefilter.py diff --git a/data/AutomatedDataPipeline.py b/data/AutomatedDataPipeline.py new file mode 100644 index 000000000..dfe82db26 --- /dev/null +++ b/data/AutomatedDataPipeline.py @@ -0,0 +1,47 @@ +import pandas as pd + +#Set True for download. False for use of local data +downloadFiles = True + +#Download data for cars +if(downloadFiles): + #df = pd.read_csv('https://www.kba.de/SharedDocs/Downloads/DE/Statistik/Fahrzeuge/FZ28/fz28_2022_09.xlsx?__blob=publicationFile&v=4', sep=',', storage_options=storage_options, nrows=15, usecols=["latitude", "longitude", "speed"]) + df = pd.read_excel('https://www.kba.de/SharedDocs/Downloads/DE/Statistik/Fahrzeuge/FZ28/fz28_2022_12.xlsx?__blob=publicationFile&v=4', sheet_name=4, usecols="B:P", skiprows=range(1,12))#header=[7,8,9,10,11]) #skiprows=[0,1,2,3,4,5,6,101,102]) + +else: + df = pd.read_excel('/Users/carstenschmotz/Downloads/fz28_2022_09.xlsx', sheet_name=4,header=[7,8,9,10,11] ) +#Rename columns +df.columns.values[0] = 'Monat' +df.columns.values[1] = 'Insgesamt' +df.columns.values[2] = 'Alternative Antrieb' +df.columns.values[3] = 'Alternative in Prozent' +df.columns.values[4] = 'Elektroantriebe Ingesamt' + +#Write into an sql-File CarRegistration into the table carregistration +df.to_sql('carregistration', 'sqlite:///./CarRegistration.sqlite', if_exists='replace', index=False) +print("First Download DONE ") + + + #Download data for cars +if(downloadFiles): + df = pd.read_csv('https://www-genesis.destatis.de/genesis/downloads/00/tables/61243-0002_00.csv', sep=';', encoding="ISO-8859-1",skiprows =[0,1,2,3,4,5]) + +else: + + df = pd.read_csv('/Users/carstenschmotz/Downloads/61243-0002_00.csv', sep=';', skiprows=[0,1,2,3,4]) + +#Rename columns +df.columns.values[0] = 'Jahr' +df.columns.values[1] = 'Verbrauchsklassen' +df.columns.values[2] = 'Energie und Vertrieb' +df.columns.values[1] = 'Haushalte' +df.columns.values[2] = 'Insgesamt' + + + + + +#Write into an sql-File Energyprize into the table prize +df.to_sql('prize', 'sqlite:///./Energyprize.sqlite', if_exists='replace', index=False) + +print("Second Download DONE ") \ No newline at end of file diff --git a/data/tablefilter.py b/data/tablefilter.py new file mode 100644 index 000000000..0aa08b414 --- /dev/null +++ b/data/tablefilter.py @@ -0,0 +1,92 @@ +import pandas as pd +import sqlite3 +from sqlalchemy.types import Integer, FLOAT,String + +carreg_table = "./CarRegistration.sqlite" +energy_table = "./Energyprize.sqlite" + + + + + + +#Car registrations +conn_reverse = sqlite3.connect(carreg_table) +cursor = conn_reverse.cursor() + +#Reverse data to match +sql_query_reverse = ''' +SELECT * FROM carregistration +ORDER BY rowid DESC +''' +df = pd.read_sql_query(sql_query_reverse, conn_reverse) +df.to_sql('carregistration',conn_reverse, if_exists= 'replace', index= False) +conn_reverse.close + + + + + +conn = sqlite3.connect(carreg_table) +cursor = conn.cursor() + +#Filter the sums of the years +teilstring = 'Jahr' +sql_query = ''' +SELECT * FROM Cars +where Monat LIKE '%{}%' +'''.format(teilstring) +df = pd.read_sql_query(sql_query, conn) +conn.close + +Result = './data.sqlite' +conn_neu = sqlite3.connect(Result) +df.to_sql('Cars',conn_neu, if_exists= 'replace', index= False) +conn_neu.close + +print('Carfilter done') + + + + + + + + + +#Energy prizes +conn_prize = sqlite3.connect(energy_table) +cursor = conn_prize.cursor() + +#Reverse data to match +sql_query_reverse = ''' +SELECT * FROM prize +ORDER BY rowid DESC +''' +df = pd.read_sql_query(sql_query_reverse, conn_prize) +df.to_sql('prize',conn_prize, if_exists= 'replace', index= False) +conn_prize.close + + + +conn = sqlite3.connect(energy_table) +cursor = conn.cursor() + +#Filter the sums of the years +sql_query = ''' +SELECT * FROM Prize +where Haushalte = 'Insgesamt' ''' + +df = pd.read_sql_query(sql_query, conn) +conn.close + +conn_neu = sqlite3.connect(Result) +df.to_sql('Prize',conn_neu, if_exists= 'replace', index= False) +conn_neu.close + +print('Energyfilter done') + + + + +