Add files via upload

ThomasVBP · Jun 3, 2024 · 44714cf · 44714cf
1 parent 31be481
commit 44714cf
Show file tree

Hide file tree

Showing 7 changed files with 697 additions and 0 deletions.
diff --git a/src/Decision_Tree/Functions.py b/src/Decision_Tree/Functions.py
@@ -0,0 +1,107 @@
+"""
+This file contains the functions used by the 'Main' file to convert the decision 
+tree generated in Python to the structured text language of ABB's 800xA software.
+
+A description of the functionality and parameters of each function is provided
+within the function itself.
+
+"""
+
+def If_Elsif_Insertion(vector, filepath):
+    """
+    This function adds the terms 'If' and 'Elsif' in the appropriate places.
+    Input Parameters:
+        vector = Number of vertical bars (|) in each line of the file;
+        filepath = Name of the file where the adjustments will be made.
+    """
+    groups = [list(range(len(vector)))] 
+    while groups: 
+        current_group = groups.pop(0) 
+        subgroups_to_analyze = []    
+        for i in current_group: 
+            value = vector[i] 
+            for j in current_group:
+                if i != j: 
+                    if vector[j] == value: 
+                        with open(filepath, 'r') as file: 
+                            lines = file.readlines()
+                        lines[j] = lines[j].replace("---", "if") 
+                        lines[i] = lines[i].replace("---", "elsif") 
+                        with open(filepath, 'w') as file: 
+                            file.write(''.join(lines)) 
+                        subgroup1 = current_group[current_group.index(j) + 1:current_group.index(i)]
+                        subgroup2 = current_group[current_group.index(i) + 1:]
+                        subgroups_to_analyze.extend([subgroup1, subgroup2])
+                        break
+                    else:
+                        break
+        groups.extend(subgroups_to_analyze)
+
+def Decreased_indentation(filepath):
+    """
+    This function analyzes line indentations to identify conditional structure 
+    blocks.
+    Input parameters:
+        filepath = Name of the file where the reading will be performed.
+    Output parameters:
+        lines_with_smaller_indentation = Vector containing indices of lines that
+        have an indentation smaller than the previous line.
+
+    """
+    line_with_smaller_indentation = []
+    with open(filepath, 'r') as file: 
+        lines = file.readlines()
+    for i in range(1, len(lines)):
+        current_line = lines[i]
+        previous_line = lines[i - 1]
+        current_indentation = len(current_line) - len(current_line.lstrip())
+        previous_indentation = len(previous_line) - len(previous_line.lstrip())     
+        if current_indentation == 0:
+            pass
+        elif current_indentation < previous_indentation:
+            line_with_smaller_indentation.append(i)
+    return line_with_smaller_indentation
+
+def Insertion_terms(filepath, index):
+    """
+    This function identifies the position where the 'end_if' term should be 
+    placed.
+    Input parameters:
+        filepath = Name of the file where the reading will be performed.
+        index = Vector that stores lines with indentation smaller than the 
+        previous line.
+    Output parameters:
+        lines_with_smaller_indices = Vector that stores the indices of lines
+        where the term "end_if" should be added;
+        indentation = Vector that stores the number of spaces at the beginning 
+        of lines stored in the index vector that have lines below with smaller
+        indentation;
+        tabulation = Vector that stores the number of spaces at the beginning of
+        lines stored in the index vector that do not have lines below with
+        smaller indentation.
+
+    """
+
+    with open(filepath, 'r') as original_file:
+        lines = original_file.readlines()
+    lines_with_additions = []; lines_with_smaller_indices = []; indentation = []
+    tabulation = []
+    for i, line in enumerate(lines):
+        lines_with_additions.append(line)
+        if i in index: 
+            indentationlinei = len(lines[i]) - len(lines[i].lstrip())
+            j = i + 1 
+            while j < len(lines): 
+                analyzed_line= lines[j]
+                analyzed_indentation = len(analyzed_line) - len(analyzed_line.lstrip())
+                lines_limit = len(lines) - 1
+                if analyzed_indentation < indentationlinei:
+                    lines_with_smaller_indices.append(j)
+                    indentation.append(indentationlinei)
+                    break
+                elif j == lines_limit:
+                    tabulation.append(len(lines[i]) - len(lines[i].lstrip()))
+                    break
+                else:
+                    j += 1 
+    return lines_with_smaller_indices, indentation, tabulation
diff --git a/src/Decision_Tree/Main.py b/src/Decision_Tree/Main.py
@@ -0,0 +1,177 @@
+"""
+This file performs the conversion of a decision tree structure (classification 
+or regression) generated in Python into a suitable version compatible with the 
+'structured text' language used in ABB's 800xA programming software. The goal 
+is to streamline the work of engineers in implementing decision trees in an 
+industrial environment.
+
+This file was designed considering the syntax of decision trees generated in 
+Python using the 'scikit-learn' library and having only a single output. 
+Therefore, decision trees that do not meet these requirements may not be 
+correctly converted.
+
+A brief description of how the code works:
+    
+    A .txt file containing the generated decision tree must be saved in the same
+    directory as this file. To generate the .txt file, we suggest appending the
+    following code at the end of the user-created file used to generate the 
+    decision tree.
+        
+    ---------------------------------------------------------------------------
+    with open('filename.txt', 'w') as file:
+        file.write(tree.export_text(clf))
+        
+    NOTE: 
+        - Replace 'filename' with the desired name;
+        - Replace 'clf' with the name of the variable containing the tree.
+    ---------------------------------------------------------------------------
+    When starting this code, the user needs to provide the following information:
+        1- Name of the .txt file containing the decision tree;
+        2- The name of the output variable to be written in the converted tree.
+    
+    After entering the information, the code within this file, along with the 
+    'Functions' file, will generate a .txt file saved in the same directory as 
+    this file. The generated file will contain a decision tree that preserves 
+    the same functionality as the original decision tree. However, it will have 
+    a syntax suitable for the 'structured text' language used by ABB's 800xA 
+    software.
+    
+    Additionally, another file ('tabela.txt') is generated, which contains the 
+    variables and parameters used in the code, as well as information such as 
+    'Data type', 'Direction', 'FD Port', and 'Attributes'
+    
+    Thus, users can directly copy the contents of the generated .txt files into 
+    the 800xA software, requiring only minimal adjustments if necessary.
+    
+"""
+import numpy as np
+import re
+from Functions import If_Elsif_Insertion, Decreased_indentation, Insertion_terms
+
+# -------- Input tree information --------            
+
+# -------- Validation of the existence of the .txt file in the folder
+while True:
+    original_name = input('Source file name: ')
+    original_file = original_name + '.txt'
+
+    try:
+        # -------- Creating target file -------- 
+        destination_file = original_name + '_800xA' + '.txt'
+        with open(original_file, 'rb') as original:
+            with open(destination_file, 'wb') as destiny:
+                destiny.write(original.read())
+
+            break  
+    except FileNotFoundError:
+        print(f"ATTENTION!\n The file {original_file} does not exist in this folder. Please enter the original file name again.\ndf")
+
+# -------- Checking the tree type (1 = classification | 2 = regression) -------- 
+
+with open(destination_file, 'r') as destiny:   
+    content = destiny.read()
+if 'class' in content:
+    tree_type = 1 
+elif 'value' in content:
+    tree_type = 2 
+
+# -------- Counting and removal of slashes (|) -------- 
+
+with open(destination_file, 'r') as file:   
+    lines = file.readlines()
+num_slashes = np.empty(len(lines), dtype=int)
+for i in range(len(lines)):
+    num_slashes[i] = lines[i].count('|')
+    lines[i] = lines[i].replace("|", "")
+with open(destination_file, "w") as file:
+    file.write(''.join(lines))
+
+# -------- Insertion of If and Elsif -------- 
+
+If_Elsif_Insertion(num_slashes, destination_file)
+
+# -------- Syntax adjustments -------- 
+
+modifications = [("--- ", ""), (":", " :=")]
+with open(destination_file, 'r') as file:
+    content = file.read()
+for from_, to_ in modifications:
+    content = content.replace(from_, to_)
+with open(destination_file, 'w') as file:
+    file.write(content)
+
+# -------- Create a list with the names of the input variables and add the name of the output variable --------
+
+variables = set()
+with open(destination_file, 'r') as file:
+    for line in file:
+        if line.strip():
+            first_word = line.strip().lstrip('if').lstrip('elsif').split()[0]
+            if first_word not in ('value', 'class') and first_word not in variables:
+                variables.add(first_word)
+    while True:
+        output_variable = input("Enter the output variable: ")
+        if output_variable not in variables:
+            break
+        else:
+            print("ERROR: The output variable should be different from the input variables. Please provide a different variable.")
+
+# -------- Substituting the input and output variables -------- 
+
+with open(destination_file, "r+") as file:
+    content = file.read()    
+    if tree_type == 1:
+        content = re.sub(r'class := (.+)', r'class := \1;', content)
+        content = content.replace('class', output_variable)
+    elif tree_type == 2:
+        content = re.sub(r'value := \[([0-9.]+)\]', r'value := \1;', content)
+        content = content.replace('value', output_variable)
+    file.seek(0)
+    file.write(content)
+    file.truncate() 
+
+# -------- Insertion of the term 'end_if' and ';' at the end of the lines -------- 
+
+index = Decreased_indentation(destination_file)
+calculated_indices, indentation, tabulation = Insertion_terms(destination_file, index)
+
+with open(destination_file, 'a') as file:
+    if tabulation is not None:
+        for added_space in reversed(tabulation):
+            file.write((' ' * added_space) + 'end_if;' + '\n')
+        file.write('end_if;' + '\n')
+    else:
+        file.write('end_if;' + '\n')
+
+with open (destination_file, 'r') as file:
+    lines = file.readlines()   
+for i in range(len(calculated_indices)): 
+    line = calculated_indices[i]
+    for j in range(len(lines)): 
+        previous_line = lines[j]
+        if j == line: 
+            modified_line = (" " * indentation[i]) + 'end_if;' + '\n' + previous_line 
+            lines[j] = modified_line         
+with open(destination_file, 'w') as file:
+    file.write(''.join(lines))
+
+# -------- Inclusion of the term 'then' -------- 
+
+with open(destination_file, 'r') as file:
+    original_text = file.read()   
+modified_text = re.sub(r'if (.+)', r'if (\1) then', original_text)
+with open(destination_file, 'w') as file:
+    file.write(modified_text)
+
+# -------- Construction of the variables table -------- 
+
+table_file  = original_name + '_table.txt'
+with open(table_file, 'w') as file: 
+    file.write('Parameters:\n')
+    for variable in variables:
+        line = f'{variable}\treal\t\tin\tyes\n'
+        file.write(line)
+    if tree_type == 1:
+        file.write(f'{output_variable}\tdint\t\tout\tyes\n')
+    elif tree_type == 2:
+        file.write(f'{output_variable}\treal\t\tout\tyes\n')
diff --git a/src/Example/generate_DT.py b/src/Example/generate_DT.py
@@ -0,0 +1,24 @@
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeRegressor
+from sklearn import tree
+
+df = pd.read_csv("mass_flow_rate_estimation.csv", header=0)
+df = df.replace(',', '.', regex=True) 
+df = df.astype(float)
+
+
+X = df[['med10', 'sd10']]
+y = df['bal75']
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
+
+reg = DecisionTreeRegressor(max_depth=3)
+reg.fit(X_train, y_train)
+
+feature_names = list(X.columns) 
+feature_names=feature_names
+
+with open('filename.txt', 'w') as file:
+    file.write(tree.export_text(reg, feature_names=feature_names))
+
diff --git a/src/Example/generate_MLP.py b/src/Example/generate_MLP.py
@@ -0,0 +1,37 @@
+import pandas as pd
+from sklearn.neural_network import MLPRegressor
+from sklearn.model_selection import train_test_split
+
+df = pd.read_csv("mass_flow_rate_estimation.csv", header=0)
+df = df.replace(',', '.', regex=True)
+df = df.astype(float)
+
+X = df[["med10", "sd10"]]
+y = df["bal75"]  
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+activation_function = 'tanh'
+mlp = MLPRegressor(hidden_layer_sizes=(2,), activation=activation_function, max_iter=1000, random_state=42)
+mlp.fit(X_train, y_train)
+
+with open('filename.txt', 'w') as file:
+    num_layers = len(mlp.hidden_layer_sizes) + 2
+    file.write(f"Number of layers: {num_layers}\n")
+    file.write(f"Activation Function: {activation_function}\n")
+    file.write(f"Activation Function of the Output Layer: {mlp.out_activation_}\n")
+    file.write('\n')
+    file.write("Input layer:\n")
+    for i, variable in enumerate(X_train.columns):
+        file.write(f"    Input Neuron {i + 1}: {variable}\n")
+    file.write('\n')
+    for j, (layer, biases) in enumerate(zip(mlp.coefs_, mlp.intercepts_)):
+        file.write(f"Layer {j + 1} - Neurons: {layer.shape[1]}\n")
+        file.write("Weights:\n")
+        for neuron_dest in range(layer.shape[1]):
+            file.write(f"Neuron {neuron_dest}:\n")
+            for neuron_src, weight in enumerate(layer[:, neuron_dest]):
+                file.write(f"    Weight from neuron {neuron_src}: {weight:.2f}\n")
+        file.write("Biases:\n")
+        for neuron, bias in enumerate(biases):
+            file.write(f"    Bias from neuron {neuron}: {bias:.2f}\n")
+        file.write("\n")
diff --git a/src/Example/mass_flow_rate_estimation.csv b/src/Example/mass_flow_rate_estimation.csv
@@ -0,0 +1,39 @@
+med10,sd10,bal75
+"23,04353","0,103135","1774,45"
+"23,095247","0,179946","1776,8"
+"23,166933","0,261907","1779,15"
+"23,258586","0,344382","1781,5"
+"23,370208","0,423144","1783,851"
+"23,501797","0,493915","1786,201"
+"23,653355","0,551948","1788,551"
+"23,82488","0,591455","1790,901"
+"24,016374","0,604563","1793,251"
+"24,193131","0,570068","1794,511"
+"24,329952","0,495926","1794,511"
+"24,426837","0,405978","1794,511"
+"24,483786","0,327718","1794,511"
+"24,500799","0,298073","1794,511"
+"24,477876","0,337185","1794,511"
+"24,415016","0,419325","1794,511"
+"24,312221","0,50846","1794,511"
+"24,169489","0,57829","1794,511"
+"23,986821","0,604563","1794,511"
+"23,803863","0,578707","1796,988"
+"23,650872","0,518802","1802,517"
+"23,52785","0,438389","1808,046"
+"23,434796","0,349183","1813,575"
+"23,37171","0,26522","1819,104"
+"23,338592","0,208297","1824,633"
+"23,335442","0,202598","1830,162"
+"23,36226","0,23939","1835,691"
+"23,419046","0,283153","1841,22"
+"23,5058","0,302765","1846,749"
+"23,60022","0,293935","1848,422"
+"23,68464","0,272566","1845,047"
+"23,75906","0,242394","1841,672"
+"23,82348","0,206217","1838,297"
+"23,8779","0,166361","1834,923"
+"23,92232","0,12495","1831,548"
+"23,95674","0,084107","1828,173"
+"23,98116","0,046185","1824,798"
+"23,99558","0,013978","1821,423"