diff --git a/classes/.ipynb_checkpoints/hypergeometric_distribution_class_V2-checkpoint.py b/classes/.ipynb_checkpoints/hypergeometric_distribution_class_V2-checkpoint.py
deleted file mode 100644
index 7215725..0000000
--- a/classes/.ipynb_checkpoints/hypergeometric_distribution_class_V2-checkpoint.py
+++ /dev/null
@@ -1,162 +0,0 @@
-from classes.base_algorithm_class import BaseAlgorithm
-import networkx as nx
-import pandas as pd
-from colorama import init as colorama_init
-from colorama import Fore, Back, Style
-from pathlib import Path
-import math
-from tools.helper import print_progress, normalize, import_graph_from_pickle
-from tools.workflow import get_datasets
-
-
-class HypergeometricDistributionV2(BaseAlgorithm):
-    def __init__(self):
-        self.y_score = []
-        self.y_true = []
-
-    def get_y_score(self):
-        return self.y_score
-    
-    def get_y_true(self):
-        return self.y_true
-    
-    def set_y_score(self, y_score):
-        self.y_score = y_score
-
-    def set_y_true(self, y_true):
-        self.y_true = y_true
-
-    def predict(
-        self,
-        input_directory_path,
-        graph_file_path,
-        output_path,
-    ):
-        """
-        Uses a Hypergeometric distribution to calculate a confidence value for the relationship between a protein of 
-        interest and a GO term. Includes the protein of interest in calculations. 
-        """
-        colorama_init()
-
-        # have two sets of positive and negative protein-go_term pairs
-        # for each pair, calculate the score of how well they predict whether a protein should be annotated to a GO term.
-        # 50% of the data are proteins that are annotated to a GO term
-        # 50% of the data are proteins that are not annotated to a GO term
-
-        data = {
-            "protein": [],
-            "go_term": [],
-            "pro_pro_neighbor": [],
-            "go_neighbor": [],
-            "go_annotated_pro_pro_neighbors": [],
-            "score": [],
-            "norm_score": [],
-            "true_label": [],
-        }
-
-        positive_dataset, negative_dataset = get_datasets(input_directory_path)
-        G = import_graph_from_pickle(graph_file_path)
-
-        i = 1
-        for positive_protein, positive_go, negative_protein, negative_go in zip(
-            positive_dataset["protein"],
-            positive_dataset["go"],
-            negative_dataset["protein"],
-            negative_dataset["go"],
-        ):
-
-            # calculate the score for the positive set
-            positive_pro_pro_neighbor = get_neighbors(
-                G, positive_protein, "protein_protein"
-            )
-            positive_go_neighbor = get_neighbors(G, positive_go, "protein_go_term")
-            positive_go_annotated_pro_pro_neighbor_count = (
-                get_go_annotated_pro_pro_neighbor_count(
-                    G, positive_pro_pro_neighbor, positive_go
-                )
-            )
-            
-            N = len([x for x,y in G.nodes(data=True) if y['type']=="protein"]) #Total number of protein nodes in the entire graph
-            pos_n = len(positive_pro_pro_neighbor) + 1 #Number of protein neighbors the protein of interest has (includes the protein of interest)
-            K = len(positive_go_neighbor) #Number of protein neighbors the GO term of interest has, same for pos & neg
-            pos_k = positive_go_annotated_pro_pro_neighbor_count + 1 #The overlap between the GO protein neighbors and protein neighbors of the protein of interest (includes the protein of interest)
-
-            #The hypergeometric function using variables above, math.comb(n,k) is an n choose k function
-            positive_score = 1 - ((math.comb(K,pos_k)*math.comb(N-K,pos_n-pos_k))/math.comb(N,pos_n))
-
-            # calculate the score for the negative set
-            negative_pro_pro_neighbor = get_neighbors(
-                G, negative_protein, "protein_protein"
-            )
-            negative_go_neighbor = get_neighbors(G, negative_go, "protein_go_term")
-            negative_go_annotated_protein_neighbor_count = (
-                get_go_annotated_pro_pro_neighbor_count(
-                    G, negative_pro_pro_neighbor, negative_go
-                )
-            )
-
-            neg_n = len(negative_pro_pro_neighbor) + 1 #Negative protein of interest neighbors (includes self)
-            neg_k = negative_go_annotated_protein_neighbor_count #Overlap betweesn go neighbors and protein neighbors (should be fewer for neg than pos)
-
-            negative_score = 1 - ((math.comb(K,neg_k)*math.comb(N-K,neg_n-neg_k))/math.comb(N,neg_n))
-
-            # input positive and negative score to data
-            data["protein"].append(positive_protein)
-            data["go_term"].append(positive_go)
-            data["pro_pro_neighbor"].append(len(positive_pro_pro_neighbor))
-            data["go_neighbor"].append(len(positive_go_neighbor))
-            data["go_annotated_pro_pro_neighbors"].append(
-                positive_go_annotated_pro_pro_neighbor_count
-            )
-            data["score"].append(positive_score)
-            data["true_label"].append(1)
-
-            data["protein"].append(negative_protein)
-            data["go_term"].append(negative_go)
-            data["pro_pro_neighbor"].append(len(negative_pro_pro_neighbor))
-            data["go_neighbor"].append(len(negative_go_neighbor))
-            data["go_annotated_pro_pro_neighbors"].append(
-                negative_go_annotated_protein_neighbor_count
-            )
-            data["score"].append(negative_score)
-            data["true_label"].append(0)
-
-            print_progress(i, len(positive_dataset["protein"]))
-            i += 1
-
-        normalized_data = normalize(data["score"])
-        for item in normalized_data:
-            data["norm_score"].append(item)
-
-        df = pd.DataFrame(data)
-        df = df.sort_values(by="norm_score", ascending=False)
-
-        df.to_csv(
-            Path(output_path, "hypergeometricdistributionV3.csv"),
-            index=False,
-            sep="\t",
-        )
-
-        y_score = df["norm_score"].to_list()
-        y_true = df["true_label"].to_list()
-
-        return y_score, y_true
-
-
-def get_neighbors(G: nx.Graph, node, edgeType):
-    res = G.edges(node, data=True)
-    neighbors = []
-    for edge in res:
-        if edge[2]["type"] == edgeType:
-            neighborNode = [edge[1], edge[2]]
-            neighbors.append(neighborNode)
-
-    return neighbors
-
-
-def get_go_annotated_pro_pro_neighbor_count(G: nx.Graph, nodeList, goTerm):
-    count = 0
-    for element in nodeList:
-        if G.has_edge(element[0], goTerm):
-            count += 1
-    return count
diff --git a/classes/.ipynb_checkpoints/hypergeometric_distribution_class_V3-checkpoint.py b/classes/.ipynb_checkpoints/hypergeometric_distribution_class_V3-checkpoint.py
deleted file mode 100644
index 9654e8f..0000000
--- a/classes/.ipynb_checkpoints/hypergeometric_distribution_class_V3-checkpoint.py
+++ /dev/null
@@ -1,164 +0,0 @@
-from classes.base_algorithm_class import BaseAlgorithm
-import networkx as nx
-import pandas as pd
-from colorama import init as colorama_init
-from colorama import Fore, Back, Style
-from pathlib import Path
-import math
-from tools.helper import print_progress, normalize, import_graph_from_pickle
-from tools.workflow import get_datasets
-
-
-class HypergeometricDistributionV3(BaseAlgorithm):
-    def __init__(self):
-        self.y_score = []
-        self.y_true = []
-
-    def get_y_score(self):
-        return self.y_score
-    
-    def get_y_true(self):
-        return self.y_true
-    
-    def set_y_score(self, y_score):
-        self.y_score = y_score
-
-    def set_y_true(self, y_true):
-        self.y_true = y_true
-
-    def predict(
-        self,
-        input_directory_path,
-        graph_file_path,
-        output_path,
-    ):
-        """
-        Uses a Hypergeometric distribution to calculate a confidence value for the relationship between a protein of 
-        interest and a GO term. Only uses proteins inside the sub-network (comprised of proteins linked with the protein 
-        of interest and/or the GO term). Does not include the protein of interest.
-        """
-        colorama_init()
-
-        # have two sets of positive and negative protein-go_term pairs
-        # for each pair, calculate the score of how well they predict whether a protein should be annotated to a GO term.
-        # 50% of the data are proteins that are annotated to a GO term
-        # 50% of the data are proteins that are not annotated to a GO term
-
-        data = {
-            "protein": [],
-            "go_term": [],
-            "pro_pro_neighbor": [],
-            "go_neighbor": [],
-            "go_annotated_pro_pro_neighbors": [],
-            "score": [],
-            "norm_score": [],
-            "true_label": [],
-        }
-
-        positive_dataset, negative_dataset = get_datasets(input_directory_path)
-        G = import_graph_from_pickle(graph_file_path)
-
-        i = 1
-        for positive_protein, positive_go, negative_protein, negative_go in zip(
-            positive_dataset["protein"],
-            positive_dataset["go"],
-            negative_dataset["protein"],
-            negative_dataset["go"],
-        ):
-
-            # calculate the score for the positive set
-            positive_pro_pro_neighbor = get_neighbors(
-                G, positive_protein, "protein_protein"
-            )
-            positive_go_neighbor = get_neighbors(G, positive_go, "protein_go_term")
-            positive_go_annotated_pro_pro_neighbor_count = (
-                get_go_annotated_pro_pro_neighbor_count(
-                    G, positive_pro_pro_neighbor, positive_go
-                )
-            )
-            
-            pos_N = len(positive_pro_pro_neighbor) + len(positive_go_neighbor) -positive_go_annotated_pro_pro_neighbor_count - 1 #Sample size is only the neighbors of the protein & GO term of interest
-            pos_n = len(positive_pro_pro_neighbor) #Number of protein neighbors the protein of interest has
-            K = len(positive_go_neighbor) - 1 #Number of protein neighbors the GO term of interest has, same for pos & neg, does not include the protein of interest
-            pos_k = positive_go_annotated_pro_pro_neighbor_count #The overlap between the GO term and the protein of interst's neighbor proteins
-
-            #The hypergeometric function using variables above, math.comb(n,k) is an n choose k function
-            positive_score = 1 - ((math.comb(K,pos_k)*math.comb(pos_N-K,pos_n-pos_k))/math.comb(pos_N,pos_n))
-
-            # calculate the score for the negative set
-            negative_pro_pro_neighbor = get_neighbors(
-                G, negative_protein, "protein_protein"
-            )
-            negative_go_neighbor = get_neighbors(G, negative_go, "protein_go_term")
-            negative_go_annotated_protein_neighbor_count = (
-                get_go_annotated_pro_pro_neighbor_count(
-                    G, negative_pro_pro_neighbor, negative_go
-                )
-            )
-
-            neg_N = len(negative_pro_pro_neighbor) + len(negative_go_neighbor) - negative_go_annotated_protein_neighbor_count 
-            neg_n = len(negative_pro_pro_neighbor) 
-            neg_k = negative_go_annotated_protein_neighbor_count
-
-            negative_score = 1 - ((math.comb(K,neg_k)*math.comb(neg_N-K,neg_n-neg_k))/math.comb(neg_N,neg_n))
-
-            # input positive and negative score to data
-            data["protein"].append(positive_protein)
-            data["go_term"].append(positive_go)
-            data["pro_pro_neighbor"].append(len(positive_pro_pro_neighbor))
-            data["go_neighbor"].append(len(positive_go_neighbor))
-            data["go_annotated_pro_pro_neighbors"].append(
-                positive_go_annotated_pro_pro_neighbor_count
-            )
-            data["score"].append(positive_score)
-            data["true_label"].append(1)
-
-            data["protein"].append(negative_protein)
-            data["go_term"].append(negative_go)
-            data["pro_pro_neighbor"].append(len(negative_pro_pro_neighbor))
-            data["go_neighbor"].append(len(negative_go_neighbor))
-            data["go_annotated_pro_pro_neighbors"].append(
-                negative_go_annotated_protein_neighbor_count
-            )
-            data["score"].append(negative_score)
-            data["true_label"].append(0)
-
-            print_progress(i, len(positive_dataset["protein"]))
-            i += 1
-
-        normalized_data = normalize(data["score"])
-        for item in normalized_data:
-            data["norm_score"].append(item)
-
-        df = pd.DataFrame(data)
-        df = df.sort_values(by="norm_score", ascending=False)
-
-        df.to_csv(
-            Path(output_path, "hypergeometricdistribution.csv"),
-            index=False,
-            sep="\t",
-        )
-
-        y_score = df["norm_score"].to_list()
-        y_true = df["true_label"].to_list()
-
-        return y_score, y_true
-
-
-def get_neighbors(G: nx.Graph, node, edgeType):
-    res = G.edges(node, data=True)
-    neighbors = []
-    for edge in res:
-        if edge[2]["type"] == edgeType:
-            neighborNode = [edge[1], edge[2]]
-            neighbors.append(neighborNode)
-
-    return neighbors
-
-
-def get_go_annotated_pro_pro_neighbor_count(G: nx.Graph, nodeList, goTerm):
-    count = 0
-    for element in nodeList:
-        if G.has_edge(element[0], goTerm):
-            count += 1
-    return count
diff --git a/tools/.ipynb_checkpoints/helper-checkpoint.py b/tools/.ipynb_checkpoints/helper-checkpoint.py
deleted file mode 100644
index 3be6cee..0000000
--- a/tools/.ipynb_checkpoints/helper-checkpoint.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from colorama import Fore, Style
-import networkx as nx
-import random
-import numpy as np
-import pickle
-
-
-def print_progress(current, total, bar_length=65):
-    # Calculate the progress as a percentage
-    percent = float(current) / total
-    # Determine the number of hash marks in the progress bar
-    arrow = "-" * int(round(percent * bar_length) - 1) + ">"
-    spaces = " " * (bar_length - len(arrow))
-
-    # Choose color based on completion
-    if current < total:
-        color = Fore.YELLOW
-    else:
-        color = Fore.GREEN
-
-    # Construct the progress bar string
-    progress_bar = f"[{arrow + spaces}] {int(round(percent * 100))}%"
-
-    # Print the progress bar with color, overwriting the previous line
-    print(f"\r{color}{progress_bar}{Style.RESET_ALL}", end="")
-
-
-def create_ppi_network(fly_interactome, fly_GO_term):
-    print("Initializing network")
-    i = 1
-    total_progress = len(fly_interactome) + len(fly_GO_term)
-    G = nx.Graph()
-    protein_protein_edge = 0
-    protein_go_edge = 0
-    protein_node = 0
-    go_node = 0
-    protein_list = []
-    go_term_list = []
-
-    # go through fly interactome, add a new node if it doesnt exists already, then add their physical interactions as edges
-    for line in fly_interactome:
-        if not G.has_node(line[2]):
-            G.add_node(line[2], name=line[0], type="protein")
-            protein_list.append({"id": line[2], "name": line[0]})
-            protein_node += 1
-
-        if not G.has_node(line[3]):
-            G.add_node(line[3], name=line[1], type="protein")
-            protein_list.append({"id": line[3], "name": line[1]})
-            protein_node += 1
-
-        G.add_edge(line[2], line[3], type="protein_protein")
-        protein_protein_edge += 1
-        print_progress(i, total_progress)
-        i += 1
-
-    # Proteins annotated with a GO term have an edge to a GO term node
-    for line in fly_GO_term:
-        if not G.has_node(line[1]):
-            G.add_node(line[1], type="go_term")
-            go_term_list.append(line[1])
-            go_node += 1
-
-        if not G.has_node(line[0]):
-            G.add_node(line[0], name=line[0], type="protein")
-            protein_list.append({"id": line[0], "name": line[0]})
-            protein_node += 1
-
-        G.add_edge(line[1], line[0], type="protein_go_term")
-        protein_go_edge += 1
-        print_progress(i, total_progress)
-        i += 1
-
-    print("")
-    print("")
-    print("network summary")
-
-    print("protein-protein edge count: ", protein_protein_edge)
-    print("protein-go edge count: ", protein_go_edge)
-    print("protein node count: ", protein_node)
-    print("go node count: ", go_node)
-    print("total edge count: ", len(G.edges()))
-    print("total node count: ", len(G.nodes()))
-
-    return G, protein_list
-
-
-def read_specific_columns(file_path, columns, delimit):
-    try:
-        with open(file_path, "r") as file:
-            next(file)
-            data = []
-            for line in file:
-                parts = line.strip().split(delimit)
-                selected_columns = []
-                for col in columns:
-                    selected_columns.append(parts[col].replace('"', ""))
-                data.append(selected_columns)
-            return data
-    except FileNotFoundError:
-        print(f"Error: File '{file_path}' not found.")
-        return None
-    except Exception as e:
-        print(f"An error occurred: {e}")
-        return None
-
-
-def generate_random_colors(num_colors):
-    colors = []
-    for _ in range(num_colors):
-        color = (random.random(), random.random(), random.random())
-        colors.append(color)
-    return colors
-
-
-def normalize(data):
-    data = np.array(data)
-    min_val = data.min()
-    max_val = data.max()
-
-    if min_val == max_val:
-        return np.zeros_like(data)
-
-    normalized_data = (data - min_val) / (max_val - min_val)
-    return normalized_data.tolist()
-
-
-def get_neighbors(G: nx.Graph, node, edgeType):
-    res = G.edges(node, data=True)
-    neighbors = []
-    for edge in res:
-        if edge[2]["type"] == edgeType:
-            neighborNode = [edge[1], edge[2]]
-            neighbors.append(neighborNode)
-
-    return neighbors
-
-
-def add_print_statements(filename, statements):
-    # Open the file in append mode (will create the file if it doesn't exist)
-    with open(filename, "w") as file:
-        for statement in statements:
-            # Write each statement to the file
-            file.write(f"{statement}\n")
-
-
-def export_graph_to_pickle(graph, filename):
-    with open(filename, 'wb') as f:
-        pickle.dump(graph, f)
-
-
-def import_graph_from_pickle(filename):
-    with open(filename, 'rb') as f:
-        return pickle.load(f)