ready-for-upload

Campus-Energy · Jul 19, 2024 · 81ee86d · 81ee86d
1 parent 8e4a6b3
commit 81ee86d
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -69,7 +69,7 @@
 9. Run `init_crontab.py`.
 
     ```sh
-    python3 /home/campusenergy/communicator-project/init_crontab.py <interval - minute, hourly, daily, weekly, or monthly> /home/campusenergy/communicator-project/db_upload.py /home/campusenergy/db_upload.log
+    python3 /home/campusenergy/communicator-project/init_crontab.py <interval - minute, hourly, daily, weekly, or monthly> /home/campusenergy/communicator-project/db_upload.py /home/campusenergy/logs/db_upload.log
     ```
 
 10. Check the log file to verify script execution.
@@ -87,7 +87,7 @@
 
     The job will look something like this:
     ```plaintext
-    * * * * * /usr/bin/python3 /home/campusenergy/communicator-project/db_upload.py >> /home/campusenergy/db_upload.log 2>&1 # Communicator data processing job
+    * * * * * /usr/bin/python3 /home/campusenergy/communicator-project/db_upload.py >> /home/campusenergy/logs/db_upload.log 2>&1 # Communicator data processing job
     ```
 
     Delete the line and save your deletion.

diff --git a/transform_data.py b/transform_data.py
@@ -3,40 +3,39 @@
 import os
 
 def transform_data(file_path):
-    # read the data from the CSV file, skipping the first 4 rows which contain metadata
+    # Read the data from the CSV file, skipping the first 4 rows which contain metadata
     df = pd.read_csv(file_path, skiprows=4, header=None)
 
-    # get the second row after the skipped rows which contains 'Time Stamp' in the first column and units of measure in
-    # subsequent columns
+    # Get the second row after the skipped rows which contains 'Time Stamp' in the first column and units of measure in subsequent columns
     second_row_after_skip = df.iloc[1]
-    # get the unique units of measure
+    # Get the unique units of measure
     unique_values = second_row_after_skip[1:].unique()
-    # store first row values which contain 'Time Stamp' and meter IDs
+    # Store first row values which contain 'Time Stamp' and meter IDs
     first_row = df.iloc[0]
 
-    # create a dictionary to store the transformed dataframes
+    # Create a dictionary to store the transformed dataframes
     transformed_dataframes = {}
 
-    # iterate over the unique units of measure
+    # Iterate over the unique units of measure
     for value in unique_values:
-        # select columns with the same units of measure as the current unique_value
+        # Select columns with the same units of measure as the current unique_value
         columns_to_include = df.columns[(second_row_after_skip == value).values]
-        # include the first column which contains the datetime values and the columns with the same units of measure
+        # Include the first column which contains the datetime values and the columns with the same units of measure
         columns_to_include = [0] + list(columns_to_include)
-        # save included columns to a new dataframe
+        # Save included columns to a new dataframe
         df_subset = df.iloc[[0] + list(range(2, len(df))), columns_to_include]
-        # rename the first column to 'datetime'
+        # Rename the first column to 'datetime'
         df_subset.rename(columns={0: 'datetime'}, inplace=True)
-        # rename the columns to the values in the first row
+        # Rename the columns to the values in the first row
         df_subset.columns = ['datetime'] + first_row[columns_to_include[1:]].tolist()
-        # get the datetime column
+        # Get the datetime column
         datetime_col = df_subset['datetime']
-        # melt the dataframe to have a single column for meter_id and another for meter_reading
+        # Melt the dataframe to have a single column for meter_id and another for meter_reading
         df_melted = df_subset.iloc[1:].melt(id_vars=['datetime'], var_name='meter_id', value_name='meter_reading')
         df_melted['meter_reading'] = df_melted['meter_reading'].str.strip().str.replace(r'[^\d.]+', '', regex=True)
         df_melted['meter_reading'] = pd.to_numeric(df_melted['meter_reading'], errors='coerce')
 
-        # convert the meter readings to the appropriate units
+        # Convert the meter readings to the appropriate units
         if value == 'Watts, 3-Ph total':
             df_melted['meter_reading'] = df_melted['meter_reading'] / 1000
             transformed_dataframes['kW'] = df_melted
@@ -46,19 +45,22 @@ def transform_data(file_path):
         else:
             transformed_dataframes[value] = df_melted
 
-    # define the output directory one level higher and named 'ready-for-upload'
+    # Define the output directory one level higher and named 'ready-for-upload'
     output_dir = os.path.abspath(os.path.join(os.path.dirname(file_path), '..', 'ready-for-upload'))
-    # create the directory if it does not exist
+    # Create the directory if it does not exist
     os.makedirs(output_dir, exist_ok=True)
 
-    # save the transformed dataframes to CSV files
+    # Save the transformed dataframes to CSV files
     output_files = []
     for key, dataframe in transformed_dataframes.items():
         # Get the first and last datetime values
         first_datetime = dataframe['datetime'].iloc[0]
         last_datetime = dataframe['datetime'].iloc[-1]
+        # Format the datetime values to avoid invalid characters
+        first_datetime_str = pd.to_datetime(first_datetime).strftime('%Y-%m-%d_%H-%M-%S')
+        last_datetime_str = pd.to_datetime(last_datetime).strftime('%Y-%m-%d_%H-%M-%S')
         # Format the filename with the unit and datetime range
-        filename = os.path.join(output_dir, f'{key}_{first_datetime}_{last_datetime}.csv')
+        filename = os.path.join(output_dir, f'{key}_{first_datetime_str}_{last_datetime_str}.csv')
         dataframe.to_csv(filename, index=False)
         output_files.append(filename)
         print(f"Saved DataFrame for key '{key}' to '{filename}'")
@@ -67,10 +69,10 @@ def transform_data(file_path):
 
 if __name__ == '__main__':
     import argparse
-    # parse the input arguments
+    # Parse the input arguments
     parser = argparse.ArgumentParser(description="Transform data from a CSV file.")
     parser.add_argument('file', type=str, help='Path to the input CSV file.')
     args = parser.parse_args()
 
-    # transform the data
+    # Transform the data
     transform_data(args.file)