diff --git a/py_packages/build_data_vis.py b/py_packages/build_data_vis.py
index c8916b4..896aff1 100644
--- a/py_packages/build_data_vis.py
+++ b/py_packages/build_data_vis.py
@@ -18,7 +18,13 @@ def find_directory():
 
 
 def sort_bus_by_date(directory, bus_num):
-    ''' input bus_num as string with number of bus desired'''
+    ''' 
+    Creates a DataFrame of bus files sorted by the date they were retrieved. 
+    
+    Parameters:
+    - directory (str): The directory where the bus files are located.
+    - bus_num (str): The desired bus, written as "bus_" followed by the bus serial number.
+    '''
     # find directory of bus from sorted files
     bus_directory = directory + bus_num
 
@@ -44,6 +50,7 @@ def sort_bus_by_date(directory, bus_num):
                             list_of_dates.append(element)
                 except IndexError:
                     pass  # some files have no data
+                    
     # pull out the 'Date Retrieved' and @ symbol from the date column
     for i in range(len(list_of_dates)):
         date = list_of_dates[i]
@@ -62,7 +69,19 @@ def sort_bus_by_date(directory, bus_num):
 
 
 def build_bus_df(directory, bus_num, keyword):
+    '''
+    Creates a DataFrame for a desired bus that displays one of three variables 
+    (Current, Voltage, or Power) with the time, in seconds, the bus has spent 
+    in discrete intervals for the chosen variable.
+    
+    Parameters:
+    - directory (str): The directory where the files can be found.
+    - bus_num (str): The desired bus, written as "bus_" followed by the bus serial number.
+    - keyword (str): Keyword for the desired variable to display ('Current', 'Voltage', or 'Power').
+    '''
     bus_dates = sort_bus_by_date(directory, bus_num)
+
+    # Check for desired variable
     if keyword == 'Current':
         row_list = list(range(19)) + list(range(20, 960))
         index_range = list(range(0, 18)) + list(range(19, 960))
@@ -76,13 +95,15 @@ def build_bus_df(directory, bus_num, keyword):
         print("Keyword entered in error."
               "Please select from 'Current', 'Voltage', or 'Power'.")
 
-    bus_parameter = pd.DataFrame()
+    dfs = []
     for i in range(len(bus_dates)):
         file = bus_dates['Filename'].loc[i]
         file_dir = directory + bus_num + file
         tmp = pd.read_csv(file_dir, header=None, skiprows=row_list)
-        bus_parameter = bus_parameter.append(tmp)    
-        df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
+        dfs.append(tmp)
+
+    bus_parameter = pd.concat(dfs, ignore_index=True)
+    df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
     bus_parameter.columns = df_index.columns
 
     loc_parameter = bus_parameter.columns.str.contains('^Unnamed')
@@ -93,6 +114,20 @@ def build_bus_df(directory, bus_num, keyword):
 
 
 def build_module_df(directory, bus_num, module_num):
+    '''
+    Creates a DataFrame of voltages from one module with the rows sequential in time.
+    
+    Parameters:
+    - directory (str): The directory containing the bus files.
+    - bus_num (str): The desired bus directory.
+    - module_num (int): The integer module number (between 1 and 16).
+    
+    Note:
+    - The module data within the csv files is further subdivided into 12 submodules. 
+      Therefore, this function will output 12 rows for each date retrieved within the bus folder.
+      For example, running this function on bus 1 module 1 should return a DataFrame with 216 rows.
+      Rows 0-11 correspond to the first date in the bus folder, then rows 12-24 to the next date, etc.
+    '''
     bus_dates = sort_bus_by_date(directory, bus_num)
     start_row = 51 + (11+47) * (module_num - 1)
     end_row = start_row + 12
@@ -104,7 +139,7 @@ def build_module_df(directory, bus_num, module_num):
         file = bus_dates['Filename'].loc[i]
         file_dir = directory + bus_num + file
         tmp = pd.read_csv(file_dir, header=None, skiprows=row_list)
-        module_df = module_df.append(tmp)
+        module_df = pd.concat([module_df, tmp], ignore_index=True)
     df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
     module_df.columns = df_index.columns
 
@@ -114,7 +149,47 @@ def build_module_df(directory, bus_num, module_num):
     return module_df
 
 
+def build_module_temp(directory, bus_num, module_num):
+    '''
+    Creates a DataFrame of temperatures from one module with the rows sequential in time.
+    
+    Parameters:
+    - directory (str): The directory containing the bus files.
+    - bus_num (str): The desired bus directory.
+    - module_num (int): The integer module number (between 1 and 16).
+    '''
+    bus_dates = sort_bus_by_date(directory, bus_num)
+    start_row = 83 + (11+47) * (module_num - 1)
+    end_row = start_row + 2
+    row_list = list(range(start_row)) + list(range(end_row, 960))
+    index_range = list(range(82)) + list(range(83, 960))
+
+    module_temp = pd.DataFrame()
+    for i in range(len(bus_dates)):
+        file = bus_dates['Filename'].loc[i]
+        file_dir = directory + bus_num + file
+        tmp = pd.read_csv(file_dir, header=None, skiprows=row_list)
+        module_temp = pd.concat([module_temp, tmp], ignore_index=True)
+    df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
+    module_temp.columns = df_index.columns
+
+    module_temp = module_temp.loc[:, ~module_temp.columns.str.contains('^Unnamed')]
+    module_temp.reset_index(drop=True, inplace=True)
+    
+    return module_temp
+
+
 def build_module_average_df(directory, bus_num, module_num):
+    '''
+    Creates a DataFrame of voltages from one module with the rows sequential in time.
+    Similar to build_module_df, but averages the submodule data together to return 
+    only one row for each date for the module specified. 
+    
+    Parameters:
+    - directory (str): The directory containing the bus files.
+    - bus_num (str): The desired bus directory.
+    - module_num (int): The integer module number (between 1 and 16).
+    '''
     bus_dates = sort_bus_by_date(directory, bus_num)
     start_row = 51 + (11+47) * (module_num-1)
     end_row = start_row + 12
@@ -128,24 +203,42 @@ def build_module_average_df(directory, bus_num, module_num):
         tmp = pd.read_csv(file_dir, header=None, skiprows=row_list)
         tmp = tmp.dropna(axis=1)
         tmp = tmp.drop(0, axis=1)
-        tmp_ave = tmp.mean()
-        module_average_df = module_average_df.append(tmp_ave,
-                                                     ignore_index=True)
+        tmp_ave = tmp.mean().to_frame().transpose()  # Convert tmp_ave to a DataFrame with one row
+        module_average_df = pd.concat([module_average_df, tmp_ave], ignore_index=True)
 
+    # Once the loop completes, you can set the column names as before
     df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
     df_index = df_index.loc[:, ~df_index.columns.str.contains('^Unnamed')]
     module_average_df.columns = df_index.columns
     module_average_df.reset_index(drop=True, inplace=True)
+
+    # Define 'string' here or adjust it based on your data
     string = 'DateRetrieved'
-    module_average_df_final = pd.concat([module_average_df,
-                                        bus_dates[string].astype(str)],
-                                        axis=1)
+
+    # Concatenate the 'DateRetrieved' column to module_average_df_final
+    module_average_df_final = pd.concat([module_average_df, bus_dates[string].astype(str)], axis=1)
     module_average_df_final = module_average_df_final.set_index(string)
 
     return module_average_df_final
 
 
 def visualize_mod_time(directory, bus_num, module_num):
+    '''
+    Visualizes the distribution of time spent at each voltage in the voltage range 
+    for a given module. Running this function returns a graph with 12 plotted 
+    lines, one for each individual date in a given bus, where the x axis is voltage 
+    and the y axis is time in seconds. A dropdown menu allows selection of a specific 
+    date. Selected date remains in color, other dates are rendered gray. Axes are 
+    scalable by clicking & dragging or by mouse scroll.
+    
+    Parameters:
+    - directory (str): The directory where the files can be found.
+    - bus_num (str): The desired bus, written as "bus_" followed by the bus serial number.
+    - module_num (int): The integer module number (between 1 and 16).
+
+    Returns:
+    - line (Altair Chart Object): Altair chart displaying the visualization.
+    '''
     df = build_module_average_df(directory, bus_num, module_num)
     df = df.reset_index()
     data = df.melt('DateRetrieved', var_name='voltage', value_name='counts')
@@ -153,7 +246,7 @@ def visualize_mod_time(directory, bus_num, module_num):
 
     brush = alt.selection_interval(bind='scales')
     input_dropdown = alt.binding_select(options=dates)
-    selection = alt.selection_single(fields=['DateRetrieved'],
+    selection = alt.selection_point(fields=['DateRetrieved'],
                                      bind=input_dropdown,
                                      name=' ')
     color = alt.condition(selection,
@@ -165,7 +258,7 @@ def visualize_mod_time(directory, bus_num, module_num):
         y='counts:Q',
         color=color,
         tooltip='Name:N'
-    ).add_selection(
+    ).add_params(
         brush,
         selection
     )
@@ -175,9 +268,11 @@ def visualize_mod_time(directory, bus_num, module_num):
 
 def count_mod_changes(directory):
     '''
-    Counts changes in modules sequentially for a bus
-    over all CSV files for all buses.
-    Outputs dataframe that is used for heatmap visualizations.
+    Counts changes in modules sequentially for each bus over all CSV files for all buses. 
+    Outputs a DataFrame that is used for heatmap visualizations.
+    
+    Parameters:
+    - directory (str): The directory where the files sorted by bus can be found.
     '''
     keyword = 'Mfg Data (ASCII)'
     list_bus_nums = []  # To get the name of bus number folders
@@ -312,12 +407,23 @@ def count_mod_changes(directory):
 
 
 def visualise_mod_changes(directory):
+    '''
+    Uses the count_mod_changes output to produce a heatmap for all buses in the directory, 
+    indicating when the modules have been changed (with a color change indicating the module 
+    has been changed). The heatmap includes a drop-down menu to select the desired bus to view.
+    
+    Parameters:
+    - directory (str): The directory where the files can be found.
+
+    Returns:
+    - chart (Altair Chart Object): Altair chart displaying the heatmap.
+    '''
     df1 = count_mod_changes(directory)
     data = df1.melt(id_vars=['Bus', 'Module', 'Date'])
     buses = list(data['Bus'].unique())
     alt.data_transformers.disable_max_rows()
-    select_bus = alt.selection_single(
-        name='Select', fields=['Bus'], init={'Bus': 1},
+    select_bus = alt.selection_point(
+        name='Select', fields=['Bus'],
         bind=alt.binding_select(options=buses)
     )
 
@@ -325,12 +431,23 @@ def visualise_mod_changes(directory):
         x=alt.X('Date', title="Date", sort=None),
         y=alt.Y('Module', title="Module", sort=None),
         color=alt.Color('value', legend=None)
-    ).add_selection(select_bus).transform_filter(select_bus)
+    ).add_params(select_bus).transform_filter(select_bus)
 
     return chart
     
     
 def mod_change_statistics(directory):
+    '''
+    Uses the count_mod_changes output to calculate and graph statistics on how often a 
+    given module is changed across all of the bus data. This allows us to see if module 
+    position has an effect on the frequency of module failure.
+    
+    Parameters:
+    - directory (str): The directory where the files can be found.
+
+    Returns:
+    - chart (Matplotlib Axes Object): Bar chart displaying the average times each module is changed.
+    '''
     df1 = count_mod_changes(directory)
     grouped_times_changed = df1.groupby(['Bus', 'Module'], sort=None)['Change'].max()
     average_times_changed = grouped_times_changed.groupby(['Module'], sort=None).mean()
@@ -342,6 +459,12 @@ def mod_change_statistics(directory):
 
 
 def find_replaced_modules(directory):
+    '''
+    Makes a DataFrame showing the bus, module number and the number of times the module has been changed.
+    
+    Parameters:
+    - directory (str): The directory where the files can be found, sorted by bus.
+    '''
     serial_index = 17
     bus_swapped_mods = {}
     # Storing modules that have been swapped with each bus number
@@ -448,9 +571,16 @@ def find_replaced_modules(directory):
 
 def swapped_mod_dataframes(directory, serial_num, characteristic):
     '''
-    Given a module characteristic and a serial number corresponding to
-    a specific module, return dataframes for that characteristic specific to
-    the provided module for each file in which that serial number occurs.
+    Makes a DataFrame for a specific characteristic (Cell Voltage, Balancers, Temperature, Module Voltages) of
+    a desired module, containing each bus file in which the module's serial number occurs (across buses).
+    
+    Parameters:
+    - directory (str): The directory where the files can be found.
+    - serial_num (str): The serial number corresponding to a specific module.
+    - characteristic (str): The module characteristic ('cell voltages', 'balancers', 'temperatures', 'module voltages').
+
+    Returns:
+    - list_desired_dfs (list): A list of DataFrames for the provided characteristic specific to the provided module.
     '''
     serial_index = 17
     mod_num = re.sub(r'\W+', '', serial_num).upper()
diff --git a/py_packages/build_data_vis_ORIGINAL.py b/py_packages/build_data_vis_ORIGINAL.py
new file mode 100644
index 0000000..c8916b4
--- /dev/null
+++ b/py_packages/build_data_vis_ORIGINAL.py
@@ -0,0 +1,550 @@
+import altair as alt
+import csv
+import matplotlib.pyplot as plt
+import pandas as pd
+import pathlib
+import re
+from os import listdir
+
+def find_directory():
+    '''
+    Assuming your python file is in the directory containing KCM data files,
+    returns a path to that directory with an additional
+    forward slash for future concatenation processes.
+    '''
+    path = pathlib.Path().absolute()
+    directory = str(path) + '/'
+    return directory
+
+
+def sort_bus_by_date(directory, bus_num):
+    ''' input bus_num as string with number of bus desired'''
+    # find directory of bus from sorted files
+    bus_directory = directory + bus_num
+
+    # make list of all files in bus folder
+    csv_list = []
+    for file in listdir(bus_directory):
+        if file.endswith('.csv'):
+            csv_list.append(file)
+
+    # make a list of dates and initialize final columns for dataframe
+    list_of_dates = []
+    substring = 'Data retrieved'
+    cols = ['Filename', 'DateRetrieved']
+
+    for filename in csv_list:
+        with open(bus_directory + filename) as file:
+            reader = csv.reader(file)
+            for row in reader:
+                try:
+                    for element in row:
+                        if substring in element:
+                            # print(filename, '|', element)
+                            list_of_dates.append(element)
+                except IndexError:
+                    pass  # some files have no data
+    # pull out the 'Date Retrieved' and @ symbol from the date column
+    for i in range(len(list_of_dates)):
+        date = list_of_dates[i]
+        list_of_dates[i] = date[16:].replace('@', '')
+
+    # make the dataframe of filenames and dates
+    list_of_tuples = list(zip(csv_list, list_of_dates))
+    files_dates = pd.DataFrame(list_of_tuples, columns=cols)
+
+    # sort by date
+    files_dates['DateRetrieved'] = pd.to_datetime(files_dates.DateRetrieved)
+    files_dates.sort_values('DateRetrieved', inplace=True)
+    files_dates.reset_index(drop=True, inplace=True)
+
+    return files_dates
+
+
+def build_bus_df(directory, bus_num, keyword):
+    bus_dates = sort_bus_by_date(directory, bus_num)
+    if keyword == 'Current':
+        row_list = list(range(19)) + list(range(20, 960))
+        index_range = list(range(0, 18)) + list(range(19, 960))
+    elif keyword == 'Voltage':
+        row_list = list(range(23)) + list(range(24, 960))
+        index_range = list(range(0, 22)) + list(range(23, 960))
+    elif keyword == 'Power':
+        row_list = list(range(27)) + list(range(28, 960))
+        index_range = list(range(0, 26)) + list(range(27, 960))
+    else:
+        print("Keyword entered in error."
+              "Please select from 'Current', 'Voltage', or 'Power'.")
+
+    bus_parameter = pd.DataFrame()
+    for i in range(len(bus_dates)):
+        file = bus_dates['Filename'].loc[i]
+        file_dir = directory + bus_num + file
+        tmp = pd.read_csv(file_dir, header=None, skiprows=row_list)
+        bus_parameter = bus_parameter.append(tmp)    
+        df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
+    bus_parameter.columns = df_index.columns
+
+    loc_parameter = bus_parameter.columns.str.contains('^Unnamed')
+    bus_parameter = bus_parameter.loc[:, ~loc_parameter]
+    bus_parameter.reset_index(drop=True, inplace=True)
+
+    return bus_parameter
+
+
+def build_module_df(directory, bus_num, module_num):
+    bus_dates = sort_bus_by_date(directory, bus_num)
+    start_row = 51 + (11+47) * (module_num - 1)
+    end_row = start_row + 12
+    row_list = list(range(start_row)) + list(range(end_row, 960))
+    index_range = list(range(50)) + list(range(51, 960))
+
+    module_df = pd.DataFrame()
+    for i in range(len(bus_dates)):
+        file = bus_dates['Filename'].loc[i]
+        file_dir = directory + bus_num + file
+        tmp = pd.read_csv(file_dir, header=None, skiprows=row_list)
+        module_df = module_df.append(tmp)
+    df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
+    module_df.columns = df_index.columns
+
+    module_df = module_df.loc[:, ~module_df.columns.str.contains('^Unnamed')]
+    module_df.reset_index(drop=True, inplace=True)
+
+    return module_df
+
+
+def build_module_average_df(directory, bus_num, module_num):
+    bus_dates = sort_bus_by_date(directory, bus_num)
+    start_row = 51 + (11+47) * (module_num-1)
+    end_row = start_row + 12
+    row_list = list(range(start_row)) + list(range(end_row, 960))
+    index_range = list(range(50)) + list(range(51, 960))
+
+    module_average_df = pd.DataFrame()
+    for i in range(len(bus_dates)):
+        file = bus_dates['Filename'].loc[i]
+        file_dir = directory + bus_num + file
+        tmp = pd.read_csv(file_dir, header=None, skiprows=row_list)
+        tmp = tmp.dropna(axis=1)
+        tmp = tmp.drop(0, axis=1)
+        tmp_ave = tmp.mean()
+        module_average_df = module_average_df.append(tmp_ave,
+                                                     ignore_index=True)
+
+    df_index = pd.read_csv(file_dir, header=0, skiprows=index_range)
+    df_index = df_index.loc[:, ~df_index.columns.str.contains('^Unnamed')]
+    module_average_df.columns = df_index.columns
+    module_average_df.reset_index(drop=True, inplace=True)
+    string = 'DateRetrieved'
+    module_average_df_final = pd.concat([module_average_df,
+                                        bus_dates[string].astype(str)],
+                                        axis=1)
+    module_average_df_final = module_average_df_final.set_index(string)
+
+    return module_average_df_final
+
+
+def visualize_mod_time(directory, bus_num, module_num):
+    df = build_module_average_df(directory, bus_num, module_num)
+    df = df.reset_index()
+    data = df.melt('DateRetrieved', var_name='voltage', value_name='counts')
+    dates = list(data['DateRetrieved'].unique())
+
+    brush = alt.selection_interval(bind='scales')
+    input_dropdown = alt.binding_select(options=dates)
+    selection = alt.selection_single(fields=['DateRetrieved'],
+                                     bind=input_dropdown,
+                                     name=' ')
+    color = alt.condition(selection,
+                          alt.Color('DateRetrieved:N'),
+                          alt.value('lightgray'))
+
+    line = alt.Chart(data.reset_index()).mark_line().encode(
+        x='voltage:Q',
+        y='counts:Q',
+        color=color,
+        tooltip='Name:N'
+    ).add_selection(
+        brush,
+        selection
+    )
+
+    return line
+
+
+def count_mod_changes(directory):
+    '''
+    Counts changes in modules sequentially for a bus
+    over all CSV files for all buses.
+    Outputs dataframe that is used for heatmap visualizations.
+    '''
+    keyword = 'Mfg Data (ASCII)'
+    list_bus_nums = []  # To get the name of bus number folders
+    bus_to_ordered_csvs = {}
+    # Dictionary associating each bus folder
+    # with an chronologically ordered list of CSVs
+    bus_to_ordered_dates = {}
+    # Dictionary associating each bus folder
+    # with dates listed chronologically
+    file_serials = {}  # Dictionary with serial numbers for each CSV
+    list_df = []  # List of dataframes for each bus
+    column_names = ['Bus', 'Module', 'Date', 'Change']
+    num_mods = 16  # Constant number of mods
+    module_index = 8  # For grabbing module string indices later
+    bus_single = 5
+    bus_double = 6
+    last_two_chars = -2  # For grabbing last two characters
+    last_one_chars = -1  # For grabbing last character
+    mod_index = ['Module ' + str(i) for i in range(1, num_mods + 1)]
+    # Creating rows for dataframe
+    mod_change_count = {}
+    # Dictionary for number of changes,
+    # sum value for each module # as compared file to file
+    keyword = 'Mfg Data (ASCII)'  # Keyword to search for
+    for file in listdir(directory):
+        # Place this file in directory with False_files -> Keiton's code
+        if file.startswith('bus'):
+            list_bus_nums.append(file)  # Getting list of bus names
+    for bus in list_bus_nums:  # For each bus
+        ordered_dates = []
+        df = sort_bus_by_date(directory, bus + '/')
+        ordered_csv = df['Filename'].tolist()
+        ordered_unclean_dates = df['DateRetrieved'].tolist()
+        for unclean_date in ordered_unclean_dates:
+            split_results = unclean_date.strftime('%m/%d/%Y, %H:%M:%S')
+            ordered_dates.append(split_results)
+        bus_to_ordered_csvs[bus] = ordered_csv
+        # Grabbing a sorted list of CSV's for each bus folder
+        bus_to_ordered_dates[bus] = ordered_dates
+        # Grabbing a sorted list of dates for each folder
+    for bus_key in bus_to_ordered_csvs:
+        # For each bus folder (key value for bus to ordered files dictionary)
+        for mod_name in mod_index:
+            # Setting dictionary with all module count at 0 to start.
+            # Should be for each bus.
+            mod_change_count[mod_name] = [0]
+            # Add the dataframe at the end of the comparisons to the list_df
+        ordered_dates = bus_to_ordered_dates[bus_key]
+        # Grab list of dates for dataframe use later
+        ordered_csvs = bus_to_ordered_csvs[bus_key]
+        # Grab the list of ordered CSV's associated with current bus folder
+        for i in range(len(ordered_csvs)):
+            # For each file in the list of ordered CSV's
+            serial_nums = []
+            # Start with empty list of serial numbers for that file
+            with open(directory + bus_key + '/' + ordered_csvs[i]) as file:
+                # Looking through current file
+                reader = csv.reader(file)
+                for row in reader:
+                    for element in row:
+                        if keyword in element:
+                            mod_num = re.sub(r'\W+', '', element[17:]).lower()
+                            serial_nums.append(mod_num)
+                            # Grabbing serial numbers for each CSV file
+                        else:
+                            pass
+            # After you get all the serial numbers for a file
+            serial_nums.pop(0)  # Getting rid of first module number
+            file_serials[ordered_csvs[i]] = serial_nums
+            # Key: file name. Value: List of serial numbers for that file name
+
+        # At this point, we have a list of
+        # serial numbers associated with each CSV file
+        i = 0
+        while(i < len(ordered_csvs) - 1):
+            # While we are not looking at the last file
+            # (can't compare last file with anything)
+            first_mods = file_serials[ordered_csvs[i]]
+            # Gets you first list of serials
+            next_mods = file_serials[ordered_csvs[i + 1]]
+            # Get second list of serials
+            for j in range(len(first_mods)):
+                # For each index (mod #) in the list of modules
+                m_str = "Module " + str(j + 1)
+                # For first iteration, "Module 1"
+                if first_mods[j] != next_mods[j]:
+                    mod_change_count[m_str].append(mod_change_count[m_str][-1]
+                                                   + 1)
+                    # If different, append prev. count + 1
+                else:
+                    mod_change_count[m_str].append(mod_change_count[m_str][-1])
+                    # If same, just append prev. count
+            i += 1
+
+        # Now we have dictionary with count of changes per file
+        # compared for each module (16 mods)
+        num_comps = len(ordered_csvs) - 1
+        bus_num_element = ''
+        if len(bus_key) == bus_single:
+            bus_num_element = bus_key[-1]
+        elif len(bus_key) == bus_double:
+            bus_num_element = bus_key[-2:]
+        else:
+            bus_num_element = bus_key[-3:]
+            # print(bus_num_element)
+        bus_number_list = [bus_num_element for
+                           i in range((num_comps + 1) * num_mods)]
+        # To get the bus # values
+        module_labels = []
+        change_labels = []
+        mod_num_label = ''
+        for mod_label in mod_change_count.keys():
+            # For each module number 1 through 16
+            change_labels += mod_change_count[mod_label]
+            if len(mod_label) > module_index:
+                mod_num_label = mod_label[last_two_chars:]
+            else:
+                mod_num_label = mod_label[last_one_chars]
+            for i in range(num_comps + 1):
+                module_labels.append(mod_num_label)
+        date_labels = ordered_dates * num_mods
+        data_lists = [bus_number_list,
+                      module_labels,
+                      date_labels,
+                      change_labels]
+        df_dict = {}
+        for column, data_list in zip(column_names, data_lists):
+            df_dict[column] = data_list
+        df_bus_changes = pd.DataFrame(data=df_dict)
+        list_df.append(df_bus_changes)
+    return pd.concat(list_df, axis=0)
+
+
+def visualise_mod_changes(directory):
+    df1 = count_mod_changes(directory)
+    data = df1.melt(id_vars=['Bus', 'Module', 'Date'])
+    buses = list(data['Bus'].unique())
+    alt.data_transformers.disable_max_rows()
+    select_bus = alt.selection_single(
+        name='Select', fields=['Bus'], init={'Bus': 1},
+        bind=alt.binding_select(options=buses)
+    )
+
+    chart = alt.Chart(data).mark_rect(stroke='black').encode(
+        x=alt.X('Date', title="Date", sort=None),
+        y=alt.Y('Module', title="Module", sort=None),
+        color=alt.Color('value', legend=None)
+    ).add_selection(select_bus).transform_filter(select_bus)
+
+    return chart
+    
+    
+def mod_change_statistics(directory):
+    df1 = count_mod_changes(directory)
+    grouped_times_changed = df1.groupby(['Bus', 'Module'], sort=None)['Change'].max()
+    average_times_changed = grouped_times_changed.groupby(['Module'], sort=None).mean()
+    chart = average_times_changed.plot(kind='bar', figsize=(6,4), fontsize=14, colormap='viridis')
+    plt.xlabel('Module', fontsize=16)
+    plt.ylabel('Average times changed', fontsize=16)
+    
+    return chart
+
+
+def find_replaced_modules(directory):
+    serial_index = 17
+    bus_swapped_mods = {}
+    # Storing modules that have been swapped with each bus number
+    for folder in listdir(directory):
+        if folder.startswith('bus'):
+            bus = folder
+            bus_slash = folder + '/'
+            replaced_mods = set()
+            # For storing modules that are confirmed swapped in and out
+            modules_by_date = {}  # Storing every module per date
+            serial_start_end = {}
+            # Storing each unique module (from set) and
+            # their starting and end times
+            mod_set = set()
+            # A set (unordered, no repeats) of
+            # all module numbers in the bus folder
+            # case of letter (i.e. to avoid A1 != a1)
+            for file in listdir(directory + bus_slash):
+                # For each bus folder
+                df = sort_bus_by_date(directory, bus_slash)
+                # Sarah's dataframe for organized files
+                ordered_dates = []
+                # List of ordered dates per bus folder
+                ordered_csv = df['Filename'].tolist()
+                # List of sorted CSV's by date
+                ordered_unclean_dates = df['DateRetrieved'].tolist()
+                # List of sorted dates corresponding to CSVs
+                for unclean_date in ordered_unclean_dates:
+                    split_results = unclean_date.strftime('%m/%d/%Y, %H:%M:%S')
+                    ordered_dates.append(split_results)
+                    # Organized dates as strings
+
+                for i in range(len(ordered_csv)):
+                    serials_in_csv = []
+                    file_path = directory + bus_slash + ordered_csv[i]
+                    with open(file_path) as file:
+                        reader = csv.reader(file)
+                        for row in reader:
+                            for element in row:
+                                if 'Mfg Data (ASCII)' in element:
+                                    serial_num = element[serial_index:]
+                                    mod_num = re.sub(r'\W+',
+                                                     '',
+                                                     serial_num).upper()
+                                    if mod_num != '':
+                                        serials_in_csv.append(mod_num)
+                                        # Serials in CSV will be regex version
+                                        mod_set.add((mod_num, serial_num))
+                                        # Adding tuple with the regex
+                                        # and non-regex version
+                                    else:
+                                        pass
+                                else:
+                                    pass
+                    modules_by_date[ordered_dates[i]] = serials_in_csv
+                    # Date to list of serials
+                # Purpose of this section is just to
+                # get a module number associated with start and end date
+                for serial_tuple in mod_set:
+                    # Look through every serial number in bus
+                    count = 0
+                    first_date = ''
+                    latest_date = ''
+                    start_end_list = []
+                    base_serial = serial_tuple[0]  # Current serial of interest
+
+                    for each_date in ordered_dates:
+                        # For each date
+                        # (keys to current_date_serials dictionary)
+                        current_date_serials = modules_by_date[each_date]
+                        # Getting list of the serials for current CSV/date
+                        for comp_serial in current_date_serials:
+                            if base_serial == comp_serial:
+                                # Compare base serial to comparison serial
+                                count += 1
+                                latest_date = each_date
+                                if count == 1:
+                                    first_date = each_date
+                                    start_end_list.append(first_date)
+                                else:
+                                    pass
+                    start_end_list.append(latest_date)
+                    serial_start_end[serial_tuple[1]] = start_end_list
+                    # Uncleaned serial -> start and end dates
+
+# Now compare start and end dates for each module to
+# the first and last date of the ordered_csvs by date list
+                for serial_key in serial_start_end:
+                    start_end = serial_start_end[serial_key]
+                    if (start_end[0] != ordered_dates[0] and
+                            start_end[-1] != ordered_dates[-1]):
+                        replaced_mods.add(serial_key)
+                bus_swapped_mods[bus] = list(replaced_mods)
+    return {k: v for k, v in bus_swapped_mods.items() if v}
+
+# Function takes in characteristic argument.
+# Please use one of the following
+
+# 1. 'cell voltages' for acquiring data for the submodule voltages.
+# 2. 'balancers' for acquiring data for the module cell balancers.
+# 3. 'temperatures' for acquiring data for the module temperatures.
+# 4. 'module voltages' for acquiring data for the overall module voltages.
+
+
+def swapped_mod_dataframes(directory, serial_num, characteristic):
+    '''
+    Given a module characteristic and a serial number corresponding to
+    a specific module, return dataframes for that characteristic specific to
+    the provided module for each file in which that serial number occurs.
+    '''
+    serial_index = 17
+    mod_num = re.sub(r'\W+', '', serial_num).upper()
+    # Convert the serial number provided
+    keyword = 'Mfg Data'
+    index_dictionary = {
+        'cell voltages': [5, 7, 8, 20],
+        'balancers': [21, 23, 24, 36],
+        'temperatures': [37, 39, 40, 42],
+        'module voltages': [43, 45, 46, 47]
+        # Numbers correspond to the number of rows from
+        # the module number row that
+        # needs to be skipped in order to reach
+        # the following: title_row, column_row,
+        # start_index of data values, end_index of values
+    }
+
+    title_ind = 0
+    col_ind = 1
+    start = 2
+    end = 3
+    # empty_space = -2 (never used)
+    # key_list = index_dictionary.keys() (never used)
+    list_bus_nums = []
+    list_desired_dfs = []
+    for file in listdir(directory):
+        # Place this file in directory with False_files -> Keiton's code
+        if file.startswith('bus'):
+            list_bus_nums.append(file)
+            # Getting list of bus names
+    for bus in list_bus_nums:  # For each bus
+        ordered_dates = []
+        df = sort_bus_by_date(directory, bus + '/')
+        ordered_csv = df['Filename'].tolist()
+        ordered_unclean_dates = df['DateRetrieved'].tolist()
+        for unclean_date in ordered_unclean_dates:
+            split_results = unclean_date.strftime('%m/%d/%Y, %H:%M:%S')
+            ordered_dates.append(split_results)
+        for csv_file in range(len(ordered_csv)):
+            # Iterate over each csv file in the current bus folder
+            directory_path = directory + bus + '/' + ordered_csv[csv_file]
+            # Convert this to file name (for file in bus folder)
+            row_list = []
+            with open(directory_path) as file:
+                # Read in the CSV file specified as a list of rows
+                reader = csv.reader(file)
+                for row in reader:
+                    row_list.append(row)
+            for i in range(len(row_list)):
+                # For each row in row_list
+                for element in row_list[i]:
+                    if element is not None:
+                        if keyword in element:
+                            mod_num_test = re.sub(
+                                r'\W+', '', element[serial_index:]
+                            ).upper()
+                            if mod_num_test == mod_num:
+                                char_low = characteristic.lower()
+                                indices_list = index_dictionary[char_low]
+                                title = row_list[i +
+                                                 indices_list[title_ind]][0]
+                                title = title + '  ' + ordered_dates[csv_file]
+                                # Concatenate with Module Number
+                                # (key to dictionary)
+                                in_list = indices_list[col_ind]
+                                full_column_labels = row_list[i + in_list][1:]
+                                column_labels = [
+                                    element for
+                                    element in
+                                    full_column_labels if
+                                    element]
+                                data_vals = []
+                                row_labels = []
+                                initial = indices_list[start]
+                                finish = indices_list[end]
+                                for j in range(initial, finish):
+                                    full_row = row_list[i + j][1:]
+                                    clean_row = [
+                                        element for
+                                        element in
+                                        full_row if
+                                        element
+                                    ]
+                                    data_vals.append(clean_row)
+                                    row_labels.append(row_list[i + j][0])
+                                df_characteristic = pd.DataFrame(
+                                    data=data_vals,
+                                    columns=column_labels,
+                                    index=row_labels
+                                )
+                                for label in column_labels:
+                                    dr = df_characteristic[label].astype('int')
+                                    df_characteristic[label] = dr
+                                index = df_characteristic.index
+                                index.name = title
+                                list_desired_dfs.append(df_characteristic)
+    return list_desired_dfs