Skip to content

Commit

Permalink
ready-for-upload
Browse files Browse the repository at this point in the history
  • Loading branch information
lsollis committed Jul 19, 2024
1 parent 8e4a6b3 commit 81ee86d
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 23 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
9. Run `init_crontab.py`.
```sh
python3 /home/campusenergy/communicator-project/init_crontab.py <interval - minute, hourly, daily, weekly, or monthly> /home/campusenergy/communicator-project/db_upload.py /home/campusenergy/db_upload.log
python3 /home/campusenergy/communicator-project/init_crontab.py <interval - minute, hourly, daily, weekly, or monthly> /home/campusenergy/communicator-project/db_upload.py /home/campusenergy/logs/db_upload.log
```
10. Check the log file to verify script execution.
Expand All @@ -87,7 +87,7 @@
The job will look something like this:
```plaintext
* * * * * /usr/bin/python3 /home/campusenergy/communicator-project/db_upload.py >> /home/campusenergy/db_upload.log 2>&1 # Communicator data processing job
* * * * * /usr/bin/python3 /home/campusenergy/communicator-project/db_upload.py >> /home/campusenergy/logs/db_upload.log 2>&1 # Communicator data processing job
```
Delete the line and save your deletion.
Expand Down
44 changes: 23 additions & 21 deletions transform_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,39 @@
import os

def transform_data(file_path):
# read the data from the CSV file, skipping the first 4 rows which contain metadata
# Read the data from the CSV file, skipping the first 4 rows which contain metadata
df = pd.read_csv(file_path, skiprows=4, header=None)

# get the second row after the skipped rows which contains 'Time Stamp' in the first column and units of measure in
# subsequent columns
# Get the second row after the skipped rows which contains 'Time Stamp' in the first column and units of measure in subsequent columns
second_row_after_skip = df.iloc[1]
# get the unique units of measure
# Get the unique units of measure
unique_values = second_row_after_skip[1:].unique()
# store first row values which contain 'Time Stamp' and meter IDs
# Store first row values which contain 'Time Stamp' and meter IDs
first_row = df.iloc[0]

# create a dictionary to store the transformed dataframes
# Create a dictionary to store the transformed dataframes
transformed_dataframes = {}

# iterate over the unique units of measure
# Iterate over the unique units of measure
for value in unique_values:
# select columns with the same units of measure as the current unique_value
# Select columns with the same units of measure as the current unique_value
columns_to_include = df.columns[(second_row_after_skip == value).values]
# include the first column which contains the datetime values and the columns with the same units of measure
# Include the first column which contains the datetime values and the columns with the same units of measure
columns_to_include = [0] + list(columns_to_include)
# save included columns to a new dataframe
# Save included columns to a new dataframe
df_subset = df.iloc[[0] + list(range(2, len(df))), columns_to_include]
# rename the first column to 'datetime'
# Rename the first column to 'datetime'
df_subset.rename(columns={0: 'datetime'}, inplace=True)
# rename the columns to the values in the first row
# Rename the columns to the values in the first row
df_subset.columns = ['datetime'] + first_row[columns_to_include[1:]].tolist()
# get the datetime column
# Get the datetime column
datetime_col = df_subset['datetime']
# melt the dataframe to have a single column for meter_id and another for meter_reading
# Melt the dataframe to have a single column for meter_id and another for meter_reading
df_melted = df_subset.iloc[1:].melt(id_vars=['datetime'], var_name='meter_id', value_name='meter_reading')
df_melted['meter_reading'] = df_melted['meter_reading'].str.strip().str.replace(r'[^\d.]+', '', regex=True)
df_melted['meter_reading'] = pd.to_numeric(df_melted['meter_reading'], errors='coerce')

# convert the meter readings to the appropriate units
# Convert the meter readings to the appropriate units
if value == 'Watts, 3-Ph total':
df_melted['meter_reading'] = df_melted['meter_reading'] / 1000
transformed_dataframes['kW'] = df_melted
Expand All @@ -46,19 +45,22 @@ def transform_data(file_path):
else:
transformed_dataframes[value] = df_melted

# define the output directory one level higher and named 'ready-for-upload'
# Define the output directory one level higher and named 'ready-for-upload'
output_dir = os.path.abspath(os.path.join(os.path.dirname(file_path), '..', 'ready-for-upload'))
# create the directory if it does not exist
# Create the directory if it does not exist
os.makedirs(output_dir, exist_ok=True)

# save the transformed dataframes to CSV files
# Save the transformed dataframes to CSV files
output_files = []
for key, dataframe in transformed_dataframes.items():
# Get the first and last datetime values
first_datetime = dataframe['datetime'].iloc[0]
last_datetime = dataframe['datetime'].iloc[-1]
# Format the datetime values to avoid invalid characters
first_datetime_str = pd.to_datetime(first_datetime).strftime('%Y-%m-%d_%H-%M-%S')
last_datetime_str = pd.to_datetime(last_datetime).strftime('%Y-%m-%d_%H-%M-%S')
# Format the filename with the unit and datetime range
filename = os.path.join(output_dir, f'{key}_{first_datetime}_{last_datetime}.csv')
filename = os.path.join(output_dir, f'{key}_{first_datetime_str}_{last_datetime_str}.csv')
dataframe.to_csv(filename, index=False)
output_files.append(filename)
print(f"Saved DataFrame for key '{key}' to '{filename}'")
Expand All @@ -67,10 +69,10 @@ def transform_data(file_path):

if __name__ == '__main__':
import argparse
# parse the input arguments
# Parse the input arguments
parser = argparse.ArgumentParser(description="Transform data from a CSV file.")
parser.add_argument('file', type=str, help='Path to the input CSV file.')
args = parser.parse_args()

# transform the data
# Transform the data
transform_data(args.file)

0 comments on commit 81ee86d

Please sign in to comment.