diff --git a/Dockerfile b/Dockerfile
index f22d225..7ef8ecb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,7 +22,8 @@ RUN apt-get update && apt-get install -y \
     gffread \
     sudo \
     g++ \
-    make
+    make \
+    git
 
 RUN apt-get install -y python3
 
@@ -83,8 +84,6 @@ ENV PATH=$PATH:/app/KaKs_Calculator3.0/src:/app/ParaAT2.0:/app/OrthoFinder
 
 RUN mkdir input
 
-
-
 VOLUME /app/input
 # CMD python HGT.py -i /data/bioinf2023/PlantPath2023/genomeANDgff -OFr /data/bioinf2023/PlantPath2023/genomeANDgff/results/prot/f7b812/Results_Feb23  -v -nt 50
 #ENTRYPOINT ["python","./HGT.py", "-i", "input"]
diff --git a/dualHGT.py b/dualHGT.py
index 7b95517..54ab7bf 100644
--- a/dualHGT.py
+++ b/dualHGT.py
@@ -60,6 +60,7 @@ def arguments():
 
     return (args)
 
+
 def prepare_fasta_input(arg):
     """
 
@@ -107,7 +108,7 @@ def prepare_fasta_input(arg):
 
             species_association = {}
 
-            if cds_seq.seq[0:3] != 'ATG': # Check if the sequence starts with the canonical start codon
+            if cds_seq.seq[0:3] != 'ATG':  # Check if the sequence starts with the canonical start codon
                 # if not, add it to the list of irregular proteins
                 irregular_proteins.append(str(cds_seq.id))
 
@@ -138,11 +139,13 @@ def prepare_fasta_input(arg):
         with open(res_cds_file, "w") as Ffilecds:
             SeqIO.write(cds_seqlist, Ffilecds, "fasta")
 
-    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association)
+    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins,
+                                                                                     gene_association)
     cds_all_file, prot_all_file = create_collection_file(res_path)
 
     return (prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file)
 
+
 def prepare_gff_input(arg):
     """
     Reads GFF files, modifies them, and performs file operations.
@@ -212,8 +215,8 @@ def prepare_gff_input(arg):
     #   the irregular protein filepath
     return prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file
 
-def create_collection_file(res_path):
 
+def create_collection_file(res_path):
     prot_path = os.path.join(res_path, "prot")
     cds_path = os.path.join(res_path, "cds")
 
@@ -228,6 +231,7 @@ def create_collection_file(res_path):
 
     return cds_all_file, prot_all_file
 
+
 def pool_fastamod(pair: Tuple[Bio.SeqIO.SeqRecord, Bio.SeqIO.SeqRecord]) -> Tuple[
     SeqRecord, SeqRecord, Dict, bool, str]:  # parallelized function to modify the fasta files.
 
@@ -238,7 +242,7 @@ def pool_fastamod(pair: Tuple[Bio.SeqIO.SeqRecord, Bio.SeqIO.SeqRecord]) -> Tupl
     try:
         random_name = aa.description.split("HGT=")[1]
     except IndexError:
-        #print(
+        # print(
         #    f"Error: Protein (ID = {aa.id}) has no HGT tag in the description. \nAssigning a random gene tag.")
         random_code = binascii.hexlify(os.urandom(10)).decode("utf8")
         random_name = f"gene_{random_code}"
@@ -258,6 +262,7 @@ def pool_fastamod(pair: Tuple[Bio.SeqIO.SeqRecord, Bio.SeqIO.SeqRecord]) -> Tupl
         irregular = True  # a flag to collect all irregular proteins
     return aa, cds, gene_association, irregular, random_code
 
+
 def run_gffread(arg, file_matched):
     """
     Run and parse the output of GFFREAD for each species' gff-fasta pair.
@@ -319,7 +324,8 @@ def run_gffread(arg, file_matched):
                 g_ass['species'] = res_name
                 gene_association[random_code] = g_ass
                 if irregular:
-                    irregular_proteins.append(f"{gene_association[random_code]['species']}_{gene_association[random_code]['id']}")
+                    irregular_proteins.append(
+                        f"{gene_association[random_code]['species']}_{gene_association[random_code]['id']}")
 
         with open((res_prot_file + "_mod.fasta"), "w") as Ffileaa, open((res_cds_file + "_mod.fasta"), "w") as Ffilecds:
             SeqIO.write(aa_seqlist, Ffileaa, "fasta")
@@ -328,18 +334,19 @@ def run_gffread(arg, file_matched):
         os.remove(res_cds_file)
         os.remove(res_prot_file)
 
-    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association)
+    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins,
+                                                                                     gene_association)
 
     return gene_association_file, irregular_proteins_file
 
-def create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association):
 
+def create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association):
     gene_association_file = os.path.join(res_path, "gene_association.txt")
 
     with open(gene_association_file, "w") as GeneAssociationFile:
         for k, v in gene_association.items():
             # writing like: random_code - original id - species
-            GeneAssociationFile.write(str(k) + '\t'+ str(v['id']) + '\t'+ str(v['species']) + '\n')
+            GeneAssociationFile.write(str(k) + '\t' + str(v['id']) + '\t' + str(v['species']) + '\n')
 
     irregular_proteins_file = os.path.join(res_path, "irregular_proteins.txt")
 
@@ -348,6 +355,7 @@ def create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_associati
 
     return gene_association_file, irregular_proteins_file
 
+
 def extract_pairs_from_files(res_cds_file, res_prot_file):
     iterator = []
 
@@ -359,6 +367,7 @@ def extract_pairs_from_files(res_cds_file, res_prot_file):
                 break
     return iterator
 
+
 def parse_gff(folder):
     gene_association = {}  # gene_association is a dict used to save the mapping between the tag and the actual gene name
 
@@ -382,17 +391,18 @@ def parse_gff(folder):
 
     return gene_association
 
+
 def run_orthofinder(prot_path, arg):
     """
     Runs Orthofinder on the specified directory and returns the path to the results folder.
-    
+
     Args:
     -----
         `inputPath (str)`: The path to the directory containing the proteome files (.fasta format).
         `numberThreads (int)`: The number of threads to use for the Orthofinder run.
         `extra (str)`: Extra arguments to pass in the OrthoFinder command.
         `verbose (bool)`: If True, prints the output of the Orthofinder command.
-        
+
     Returns:
     --------
         str: The path to the OrthoFinder results folder.
@@ -424,6 +434,7 @@ def run_orthofinder(prot_path, arg):
 
     return orthofinder_output_folder
 
+
 def getSpNames(ResultsPath):
     """
     Retrieve the names of the species from the species tree file, created by OrthoFinder.
@@ -440,15 +451,16 @@ def getSpNames(ResultsPath):
         # speciesNames è una lista di stringhe con i nomi delle specie
         return speciesNames
 
+
 def read_tree(info):
     """
     Reads the a genetree file and returns a list containing gene pairs, their orthogroup, and their distance.
-    
+
     ## Args:
         info (tuple): A tuple containing the path to the genetree file and the species list.
     ## Returns:
         list: A list containing gene pairs and their distances. This list corresponds to a row of the final dataframe used to estimate HGT occurrence.
-    
+
     This function is called in `parseOrthofinder()` and executed in parallel with mp.Pool()
     """
 
@@ -482,17 +494,18 @@ def read_tree(info):
 
     return distances
 
+
 def parseOrthofinder(ResultsPath: str, threads: int) -> list:
     """
     A function to parse the Orthofinder results and return a list of entries containing the gene pairs and their distances.
-    
+
     ## Args:
         ResultsPath (str): The path to the Orthofinder results folder.
         threads (int): The number of threads to use for the parsing.
     ## Returns:
         list: Data containing the gene pairs and their distances. The data comes in the form of a list of lists.
         each list is to be considered as a data entry and will contain these columns:
-            `gene1` | `gene2` | `dist` inferred by OrthoFinder | `OG` | `type` = "tree" 
+            `gene1` | `gene2` | `dist` inferred by OrthoFinder | `OG` | `type` = "tree"
     """
 
     # retrieve the species names from the species tree file
@@ -516,10 +529,11 @@ def parseOrthofinder(ResultsPath: str, threads: int) -> list:
 
     return distances
 
+
 def kaksparallel(file: str) -> list:
     """
     Runs the KaKs calculator program in the shell.
-    
+
     ## Args:
         file (str): The path to the .axt file to be processed.
     ## Returns:
@@ -533,9 +547,9 @@ def kaksparallel(file: str) -> list:
     # run the 'KAKS' command in the shell
     output = file + ".kaks"
     if not os.path.exists(output) or not os.path.getsize(output) > 0:
-        runkaks = KAKS % (file,  #the .axt file passed as input
+        runkaks = KAKS % (file,  # the .axt file passed as input
                           output,  # the output file
-                          "MA")  # NG is the model used for the calculation
+                          "MA")  # Model Averaging is used for the calculation
         run = subprocess.Popen(runkaks, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
 
         out, err = run.communicate()
@@ -550,6 +564,7 @@ def kaksparallel(file: str) -> list:
 
     return list_entry
 
+
 def read_kaks_file(kaks_filename: str) -> list:
     with open(kaks_filename, newline='') as resultskaks:
         next(resultskaks)
@@ -557,9 +572,10 @@ def read_kaks_file(kaks_filename: str) -> list:
             if line.strip():
                 # take the first and fourth elements of the file
                 # corresponding to the sequence name and the Ks value
-                list_entry = [line.split('\t')[i] for i in [0, 4]]
+                list_entry = [line.split('\t')[i] for i in [0, 3]]
     return list_entry
 
+
 def parseKaKs(arg, ResultsPath, proteinfilefinal, cdsfilefinal):
     """
     A function to combine the gene pairs from the Orthofinder results and the KaKs distances.
@@ -658,7 +674,6 @@ def parseKaKs(arg, ResultsPath, proteinfilefinal, cdsfilefinal):
         with mp.Pool(arg.numberThreads) as p:
             with tqdm.tqdm(total=len(axtFiles), desc="Running KaKs Calculator...") as pbar:
                 for x in p.imap_unordered(kaksparallel, axtFiles):
-                    
                     # x is a list that looks like this:
                     #   ['seq_(pair?)_name', 'Ks']
 
@@ -699,18 +714,19 @@ def parseKaKs(arg, ResultsPath, proteinfilefinal, cdsfilefinal):
 
     return distances
 
+
 def append_species(entry_list, gene_association):
     """
     Transform an entry list (a list of rows) into a canonical pd.DataFrame.
     Add to the dataframe the information regarding the species to which the genes examined belong to
-    
+
     Args:
     -----
     entry_list: list
         A list of lists containing the gene pairs and their distances.
     gene_association: dict
         A dictionary containing the gene names and the species to which they belong.
-    
+
     Returns:
     --------
     pd.DataFrame
@@ -743,6 +759,7 @@ def append_species(entry_list, gene_association):
 
     return matrix
 
+
 def getMeanDist(comp):
     print(f"analyzing {comp.iloc[0, 4]}...")
 
@@ -752,10 +769,11 @@ def getMeanDist(comp):
 
     return comp[comp.mean_dist > 0]
 
+
 def getHGT(matrix, gene_association):
     """
     Transform a dataframe adding the HGT bool variable; Main function to identify potential HGT events between species.
-    
+
     Parameters:
     -----------
     `matrix` : pd.DataFrame
@@ -771,9 +789,8 @@ def getHGT(matrix, gene_association):
     matrix2 = append_species(matrix, gene_association)
 
     # remove NAs
-    #matrix2 = matrix2[matrix2['dist'] != "NA"]
+    # matrix2 = matrix2[matrix2['dist'] != "NA"]
     matrix2[matrix2['dist'] == "NA"] = 0
-
     matrix2['dist'] = pd.to_numeric(matrix2['dist'], downcast='float')
 
     # initialize an empty column for the HGT score
@@ -789,6 +806,7 @@ def getHGT(matrix, gene_association):
 
     return matrix2
 
+
 def get_topology(ResultsPath):
     """
     Parameters
@@ -830,7 +848,7 @@ def get_topology(ResultsPath):
                 try:
                     diff = og_tree.compare(
                         species_tree_ete)  # use the 'compare()' function from ete3 to compute the Robinson-Foulds distance
-                    if diff["rf"] > 0:  # if the distance from the average species tree is greater than 0, store the 
+                    if diff["rf"] > 0:  # if the distance from the average species tree is greater than 0, store the
                         list_keep.append(og)
                         dict_topology[og] = 1
                 except Exception as x:
@@ -842,6 +860,7 @@ def get_topology(ResultsPath):
 
     return list_uniq
 
+
 def vennPlot(kaks_OG_list: list, tree_OG_list: list, topology_OG_list: list):
     """
     Make a Venn Diagram. Easy.
@@ -863,6 +882,7 @@ def vennPlot(kaks_OG_list: list, tree_OG_list: list, topology_OG_list: list):
                 )
     return fig
 
+
 def plotData(df):
     """
     A final function to plot the data.
@@ -883,8 +903,8 @@ def plotData(df):
 
     return fig
 
-def prepare_input(arg):
 
+def prepare_input(arg):
     # Prepare the input files, coming in a .gff format
     if arg.gffread:
         prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file = prepare_gff_input(arg)
@@ -900,7 +920,8 @@ def prepare_input(arg):
 
     # Prepare the input files, coming in a .fasta genome format
     else:
-        prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file = prepare_fasta_input(arg)
+        prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file = prepare_fasta_input(
+            arg)
         gene_association = {}
         irregular_proteins = []
         with open(gene_association_file, 'r') as f:
@@ -911,7 +932,8 @@ def prepare_input(arg):
             for i in f.readlines():
                 irregular_proteins.append(i.strip())
 
-    return prot_all_file,cds_all_file,prot_path,gene_association,irregular_proteins
+    return prot_all_file, cds_all_file, prot_path, gene_association, irregular_proteins
+
 
 if __name__ == "__main__":
 
@@ -926,17 +948,6 @@ def prepare_input(arg):
     except FileExistsError:
         print(f"Folder '{output_folder}' already exists. Wait a second??")
 
-    logfile = os.path.join(output_folder, f'log_{current_date}.txt')
-    logfile = open(logfile, 'w')
-
-    def write_and_log(msg):
-        sys.stdout.write(msg)
-        logfile.write(msg)
-        logfile.flush()
-
-    sys.stdout = write_and_log
-    sys.stderr = write_and_log
-
     results_path = os.path.join(os.getcwd(), 'input', 'results')
 
     if os.path.exists(results_path) and os.listdir(results_path):
@@ -1027,6 +1038,7 @@ def write_and_log(msg):
 
     # Create a dataframe suitable for plotting
     import plotly
+
     plot_matrix = pd.concat([dist_matrix_kaks, dist_matrix_tree], axis=0)
 
     fig = plotData(plot_matrix)
@@ -1059,6 +1071,4 @@ def write_and_log(msg):
         cmplt.to_csv(f, sep='\t', index=False)
     if irregular_candidates:
         with open(os.path.join(output_folder, 'Irregular_candidates.tsv'), 'x') as f:
-            SeqIO.write(irregular_candidates, f, 'fasta')
-
-    logfile.close()
\ No newline at end of file
+            SeqIO.write(irregular_candidates, f, 'fasta')
\ No newline at end of file
diff --git a/dualHGT/dualHGT/dualHGT.py b/dualHGT/dualHGT/dualHGT.py
index 84133c4..ab718f2 100644
--- a/dualHGT/dualHGT/dualHGT.py
+++ b/dualHGT/dualHGT/dualHGT.py
@@ -60,6 +60,7 @@ def arguments():
 
     return (args)
 
+
 def prepare_fasta_input(arg):
     """
 
@@ -107,7 +108,7 @@ def prepare_fasta_input(arg):
 
             species_association = {}
 
-            if cds_seq.seq[0:3] != 'ATG': # Check if the sequence starts with the canonical start codon
+            if cds_seq.seq[0:3] != 'ATG':  # Check if the sequence starts with the canonical start codon
                 # if not, add it to the list of irregular proteins
                 irregular_proteins.append(str(cds_seq.id))
 
@@ -138,11 +139,13 @@ def prepare_fasta_input(arg):
         with open(res_cds_file, "w") as Ffilecds:
             SeqIO.write(cds_seqlist, Ffilecds, "fasta")
 
-    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association)
+    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins,
+                                                                                     gene_association)
     cds_all_file, prot_all_file = create_collection_file(res_path)
 
     return (prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file)
 
+
 def prepare_gff_input(arg):
     """
     Reads GFF files, modifies them, and performs file operations.
@@ -186,7 +189,7 @@ def prepare_gff_input(arg):
         print(f"Error creating directory '{res_path}': {e}")
         raise SystemExit
 
-    # by filtering the data trough dictionary key we can run only 1 for cycle to create 
+    # by filtering the data trough dictionary key we can run only 1 for cycle to create
     # a nested list of file with the same name by using as a key the file name without extension
 
     file_dic = defaultdict(list)
@@ -212,8 +215,8 @@ def prepare_gff_input(arg):
     #   the irregular protein filepath
     return prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file
 
-def create_collection_file(res_path):
 
+def create_collection_file(res_path):
     prot_path = os.path.join(res_path, "prot")
     cds_path = os.path.join(res_path, "cds")
 
@@ -228,6 +231,7 @@ def create_collection_file(res_path):
 
     return cds_all_file, prot_all_file
 
+
 def pool_fastamod(pair: Tuple[Bio.SeqIO.SeqRecord, Bio.SeqIO.SeqRecord]) -> Tuple[
     SeqRecord, SeqRecord, Dict, bool, str]:  # parallelized function to modify the fasta files.
 
@@ -238,7 +242,7 @@ def pool_fastamod(pair: Tuple[Bio.SeqIO.SeqRecord, Bio.SeqIO.SeqRecord]) -> Tupl
     try:
         random_name = aa.description.split("HGT=")[1]
     except IndexError:
-        #print(
+        # print(
         #    f"Error: Protein (ID = {aa.id}) has no HGT tag in the description. \nAssigning a random gene tag.")
         random_code = binascii.hexlify(os.urandom(10)).decode("utf8")
         random_name = f"gene_{random_code}"
@@ -258,6 +262,7 @@ def pool_fastamod(pair: Tuple[Bio.SeqIO.SeqRecord, Bio.SeqIO.SeqRecord]) -> Tupl
         irregular = True  # a flag to collect all irregular proteins
     return aa, cds, gene_association, irregular, random_code
 
+
 def run_gffread(arg, file_matched):
     """
     Run and parse the output of GFFREAD for each species' gff-fasta pair.
@@ -319,7 +324,8 @@ def run_gffread(arg, file_matched):
                 g_ass['species'] = res_name
                 gene_association[random_code] = g_ass
                 if irregular:
-                    irregular_proteins.append(f"{gene_association[random_code]['species']}_{gene_association[random_code]['id']}")
+                    irregular_proteins.append(
+                        f"{gene_association[random_code]['species']}_{gene_association[random_code]['id']}")
 
         with open((res_prot_file + "_mod.fasta"), "w") as Ffileaa, open((res_cds_file + "_mod.fasta"), "w") as Ffilecds:
             SeqIO.write(aa_seqlist, Ffileaa, "fasta")
@@ -328,18 +334,19 @@ def run_gffread(arg, file_matched):
         os.remove(res_cds_file)
         os.remove(res_prot_file)
 
-    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association)
+    gene_association_file, irregular_proteins_file = create_g_ass_and_irr_prot_files(res_path, irregular_proteins,
+                                                                                     gene_association)
 
     return gene_association_file, irregular_proteins_file
 
-def create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association):
 
+def create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_association):
     gene_association_file = os.path.join(res_path, "gene_association.txt")
 
     with open(gene_association_file, "w") as GeneAssociationFile:
         for k, v in gene_association.items():
             # writing like: random_code - original id - species
-            GeneAssociationFile.write(str(k) + '\t'+ str(v['id']) + '\t'+ str(v['species']) + '\n')
+            GeneAssociationFile.write(str(k) + '\t' + str(v['id']) + '\t' + str(v['species']) + '\n')
 
     irregular_proteins_file = os.path.join(res_path, "irregular_proteins.txt")
 
@@ -348,6 +355,7 @@ def create_g_ass_and_irr_prot_files(res_path, irregular_proteins, gene_associati
 
     return gene_association_file, irregular_proteins_file
 
+
 def extract_pairs_from_files(res_cds_file, res_prot_file):
     iterator = []
 
@@ -359,6 +367,7 @@ def extract_pairs_from_files(res_cds_file, res_prot_file):
                 break
     return iterator
 
+
 def parse_gff(folder):
     gene_association = {}  # gene_association is a dict used to save the mapping between the tag and the actual gene name
 
@@ -382,17 +391,18 @@ def parse_gff(folder):
 
     return gene_association
 
+
 def run_orthofinder(prot_path, arg):
     """
     Runs Orthofinder on the specified directory and returns the path to the results folder.
-    
+
     Args:
     -----
         `inputPath (str)`: The path to the directory containing the proteome files (.fasta format).
         `numberThreads (int)`: The number of threads to use for the Orthofinder run.
         `extra (str)`: Extra arguments to pass in the OrthoFinder command.
         `verbose (bool)`: If True, prints the output of the Orthofinder command.
-        
+
     Returns:
     --------
         str: The path to the OrthoFinder results folder.
@@ -424,6 +434,7 @@ def run_orthofinder(prot_path, arg):
 
     return orthofinder_output_folder
 
+
 def getSpNames(ResultsPath):
     """
     Retrieve the names of the species from the species tree file, created by OrthoFinder.
@@ -440,15 +451,16 @@ def getSpNames(ResultsPath):
         # speciesNames è una lista di stringhe con i nomi delle specie
         return speciesNames
 
+
 def read_tree(info):
     """
     Reads the a genetree file and returns a list containing gene pairs, their orthogroup, and their distance.
-    
+
     ## Args:
         info (tuple): A tuple containing the path to the genetree file and the species list.
     ## Returns:
         list: A list containing gene pairs and their distances. This list corresponds to a row of the final dataframe used to estimate HGT occurrence.
-    
+
     This function is called in `parseOrthofinder()` and executed in parallel with mp.Pool()
     """
 
@@ -482,17 +494,18 @@ def read_tree(info):
 
     return distances
 
+
 def parseOrthofinder(ResultsPath: str, threads: int) -> list:
     """
     A function to parse the Orthofinder results and return a list of entries containing the gene pairs and their distances.
-    
+
     ## Args:
         ResultsPath (str): The path to the Orthofinder results folder.
         threads (int): The number of threads to use for the parsing.
     ## Returns:
         list: Data containing the gene pairs and their distances. The data comes in the form of a list of lists.
         each list is to be considered as a data entry and will contain these columns:
-            `gene1` | `gene2` | `dist` inferred by OrthoFinder | `OG` | `type` = "tree" 
+            `gene1` | `gene2` | `dist` inferred by OrthoFinder | `OG` | `type` = "tree"
     """
 
     # retrieve the species names from the species tree file
@@ -516,10 +529,11 @@ def parseOrthofinder(ResultsPath: str, threads: int) -> list:
 
     return distances
 
+
 def kaksparallel(file: str) -> list:
     """
     Runs the KaKs calculator program in the shell.
-    
+
     ## Args:
         file (str): The path to the .axt file to be processed.
     ## Returns:
@@ -533,7 +547,7 @@ def kaksparallel(file: str) -> list:
     # run the 'KAKS' command in the shell
     output = file + ".kaks"
     if not os.path.exists(output) or not os.path.getsize(output) > 0:
-        runkaks = KAKS % (file,  #the .axt file passed as input
+        runkaks = KAKS % (file,  # the .axt file passed as input
                           output,  # the output file
                           "MA")  # Model Averaging is used for the calculation
         run = subprocess.Popen(runkaks, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
@@ -550,6 +564,7 @@ def kaksparallel(file: str) -> list:
 
     return list_entry
 
+
 def read_kaks_file(kaks_filename: str) -> list:
     with open(kaks_filename, newline='') as resultskaks:
         next(resultskaks)
@@ -560,6 +575,7 @@ def read_kaks_file(kaks_filename: str) -> list:
                 list_entry = [line.split('\t')[i] for i in [0, 3]]
     return list_entry
 
+
 def parseKaKs(arg, ResultsPath, proteinfilefinal, cdsfilefinal):
     """
     A function to combine the gene pairs from the Orthofinder results and the KaKs distances.
@@ -658,7 +674,6 @@ def parseKaKs(arg, ResultsPath, proteinfilefinal, cdsfilefinal):
         with mp.Pool(arg.numberThreads) as p:
             with tqdm.tqdm(total=len(axtFiles), desc="Running KaKs Calculator...") as pbar:
                 for x in p.imap_unordered(kaksparallel, axtFiles):
-                    
                     # x is a list that looks like this:
                     #   ['seq_(pair?)_name', 'Ks']
 
@@ -699,18 +714,19 @@ def parseKaKs(arg, ResultsPath, proteinfilefinal, cdsfilefinal):
 
     return distances
 
+
 def append_species(entry_list, gene_association):
     """
     Transform an entry list (a list of rows) into a canonical pd.DataFrame.
     Add to the dataframe the information regarding the species to which the genes examined belong to
-    
+
     Args:
     -----
     entry_list: list
         A list of lists containing the gene pairs and their distances.
     gene_association: dict
         A dictionary containing the gene names and the species to which they belong.
-    
+
     Returns:
     --------
     pd.DataFrame
@@ -743,6 +759,7 @@ def append_species(entry_list, gene_association):
 
     return matrix
 
+
 def getMeanDist(comp):
     print(f"analyzing {comp.iloc[0, 4]}...")
 
@@ -752,10 +769,11 @@ def getMeanDist(comp):
 
     return comp[comp.mean_dist > 0]
 
+
 def getHGT(matrix, gene_association):
     """
     Transform a dataframe adding the HGT bool variable; Main function to identify potential HGT events between species.
-    
+
     Parameters:
     -----------
     `matrix` : pd.DataFrame
@@ -788,6 +806,7 @@ def getHGT(matrix, gene_association):
 
     return matrix2
 
+
 def get_topology(ResultsPath):
     """
     Parameters
@@ -829,7 +848,7 @@ def get_topology(ResultsPath):
                 try:
                     diff = og_tree.compare(
                         species_tree_ete)  # use the 'compare()' function from ete3 to compute the Robinson-Foulds distance
-                    if diff["rf"] > 0:  # if the distance from the average species tree is greater than 0, store the 
+                    if diff["rf"] > 0:  # if the distance from the average species tree is greater than 0, store the
                         list_keep.append(og)
                         dict_topology[og] = 1
                 except Exception as x:
@@ -841,6 +860,7 @@ def get_topology(ResultsPath):
 
     return list_uniq
 
+
 def vennPlot(kaks_OG_list: list, tree_OG_list: list, topology_OG_list: list):
     """
     Make a Venn Diagram. Easy.
@@ -862,6 +882,7 @@ def vennPlot(kaks_OG_list: list, tree_OG_list: list, topology_OG_list: list):
                 )
     return fig
 
+
 def plotData(df):
     """
     A final function to plot the data.
@@ -882,8 +903,8 @@ def plotData(df):
 
     return fig
 
-def prepare_input(arg):
 
+def prepare_input(arg):
     # Prepare the input files, coming in a .gff format
     if arg.gffread:
         prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file = prepare_gff_input(arg)
@@ -899,7 +920,8 @@ def prepare_input(arg):
 
     # Prepare the input files, coming in a .fasta genome format
     else:
-        prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file = prepare_fasta_input(arg)
+        prot_path, prot_all_file, cds_all_file, gene_association_file, irregular_proteins_file = prepare_fasta_input(
+            arg)
         gene_association = {}
         irregular_proteins = []
         with open(gene_association_file, 'r') as f:
@@ -910,7 +932,8 @@ def prepare_input(arg):
             for i in f.readlines():
                 irregular_proteins.append(i.strip())
 
-    return prot_all_file,cds_all_file,prot_path,gene_association,irregular_proteins
+    return prot_all_file, cds_all_file, prot_path, gene_association, irregular_proteins
+
 
 if __name__ == "__main__":
 
@@ -925,17 +948,6 @@ def prepare_input(arg):
     except FileExistsError:
         print(f"Folder '{output_folder}' already exists. Wait a second??")
 
-    logfile = os.path.join(output_folder, 'log.txt')
-    logfile = open(logfile, 'w')
-
-    def write_and_log(msg):
-        sys.stdout.write(msg)
-        logfile.write(msg)
-        logfile.flush()
-
-    sys.stdout = write_and_log
-    sys.stderr = write_and_log
-
     results_path = os.path.join(os.getcwd(), 'input', 'results')
 
     if os.path.exists(results_path) and os.listdir(results_path):
@@ -1026,6 +1038,7 @@ def write_and_log(msg):
 
     # Create a dataframe suitable for plotting
     import plotly
+
     plot_matrix = pd.concat([dist_matrix_kaks, dist_matrix_tree], axis=0)
 
     fig = plotData(plot_matrix)
@@ -1058,6 +1071,4 @@ def write_and_log(msg):
         cmplt.to_csv(f, sep='\t', index=False)
     if irregular_candidates:
         with open(os.path.join(output_folder, 'Irregular_candidates.tsv'), 'x') as f:
-            SeqIO.write(irregular_candidates, f, 'fasta')
-
-    logfile.close()
\ No newline at end of file
+            SeqIO.write(irregular_candidates, f, 'fasta')
\ No newline at end of file