Skip to content

Commit

Permalink
Update exercise4.py
Browse files Browse the repository at this point in the history
  • Loading branch information
CarstenSchmotz committed Jul 3, 2023
1 parent 2427c38 commit 44cb0e4
Showing 1 changed file with 16 additions and 41 deletions.
57 changes: 16 additions & 41 deletions exercises/exercise4.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,31 @@
import pandas as pd
import numpy as np
import urllib.request
import zipfile as ZipFile
from sqlalchemy import create_engine


#Download zip file
urllib.request.urlretrieve("https://www.mowesta.com/data/measure/mowesta-dataset-20221107.zip", "./exercises/exercise4.zip")

df = urllib.request.urlretrieve("https://www.mowesta.com/data/measure/mowesta-dataset-20221107.zip", 'zipfile')
zip = zip.ZipFile("./exercises/exercise4.zip")
zip.extractall('./exercise')

#zipresp = urlopen(zipurl)
# Create a new file on the hard drive
tempzip = open("/tmp/tempfile.zip", "wb")
# Write the contents of the downloaded file into the new file
tempzip.write(df.read())
# Close the newly-created file
tempzip.close()
# Re-open the newly-created file with ZipFile()
zf = ZipFile("/tmp/tempfile.zip")
# Extract its contents into <extraction_path>
# note that extractall will automatically create the path
zf.extractall(path = '///.exercises/')
# close the ZipFile instance
zf.close()
'''#Download csv File
df = pd.read_csv("data.csv",sep=';', decimal=',')

#Drop Status column
df = df.drop(['Status'], axis=1)

#Then, drop all rows with invalid values in Verkehr:
#df = df[df['Verkehr'].isin(['FV','RV','nur DPN'])]
#Only use the columns
df = df['Geraet', 'Hersteller','Model','Monat','Temperatur in °C (DWD)','Batterietemperatur in °C (DWD)','Geraet aktiv']
df = pd.read_csv("./exercises/data.csv",sep=';', decimal=',', index_col=False,
usecols=["Geraet", "Hersteller", "Model", "Monat", "Temperatur in °C (DWD)", "Batterietemperatur in °C", "Geraet aktiv"])


df = df.rename(columns={"Temperatur in °C (DWD)": "Temperatur", "Batterietemperatur in °C": "Batterietemperatur"})

df ['Temperatur']= df['Temperatur'] * 9/5 +32
df['Batterietemperatur']=df['Betterietemperatur'] * 9/5 +32

#Transform data
df = df[(df['Temperatur'] * 9/5 +32 )& ((df['Betterietemperatur'] * 9/5 +32 ) ]

#Valid "IFOPT" values follow this pattern:
#<exactly two characters>:<any amount of numbers>:<any amount of numbers><optionally another colon followed by any amount of numbers>
df = df[df['IFOPT'].str.contains(r'^[A-Za-z]{2}:\d*:\d*(?::\d*)?$',na=False)]
df = df[(df["Geraet"] > 0) &
(df["Monat"] > 0) ]

#Change empty cells to nan
df.replace('',np.nan, inplace=True)
#Drop nan cells
df.dropna(inplace=True)
#Convert column 'Betreiber_Nr' to integer
df['Betreiber_Nr'] = df['Betreiber_Nr'].astype(int)
#Write to sqlite
df.to_sql('temperatures', 'sqlite:///temperatures.sqlite', if_exists= 'replace', index=False)
'''
# write to sqlite database
engine = create_engine('sqlite:///./temperatures.sqlite', echo=False)
df.to_sql("temperatures", con=engine, if_exists='replace', index=False)

0 comments on commit 44cb0e4

Please sign in to comment.